Ejemplo n.º 1
0
Archivo: ENV.py Proyecto: Zeii2024/RL
    def env(self):
        # obs builder
        obs_builder_object = self.obs_builder_dict[self.obs_builder]

        env = RailEnv(
            width=self.width,  # width和height是网格grid的数量
            height=self.height,
            rail_generator=sparse_rail_generator(
                max_num_cities=self.max_num_cities,
                # Number of cities in map (where train stations are)
                seed=19,  # Random seed
                grid_mode=True,
                max_rails_between_cities=2,
                max_rails_in_city=2,
            ),
            schedule_generator=sparse_schedule_generator(
                self.speed_ration_map),
            number_of_agents=self.number_of_agents,
            malfunction_generator_and_process_data=malfunction_from_params(
                self.stochastic_data),
            # Malfunction data generator
            obs_builder_object=obs_builder_object,
            remove_agents_at_target=False,
            record_steps=True)
        return env
Ejemplo n.º 2
0
def create_rail_env(env_params, tree_observation):
    n_agents = env_params.n_agents
    x_dim = env_params.x_dim
    y_dim = env_params.y_dim
    n_cities = env_params.n_cities
    max_rails_between_cities = env_params.max_rails_between_cities
    max_rails_in_city = env_params.max_rails_in_city
    seed = env_params.seed

    # Break agents from time to time
    malfunction_parameters = MalfunctionParameters(
        malfunction_rate=env_params.malfunction_rate,
        min_duration=20,
        max_duration=50)

    return RailEnv(
        width=x_dim,
        height=y_dim,
        rail_generator=sparse_rail_generator(
            max_num_cities=n_cities,
            grid_mode=False,
            max_rails_between_cities=max_rails_between_cities,
            max_rails_in_city=max_rails_in_city),
        schedule_generator=sparse_schedule_generator(),
        number_of_agents=n_agents,
        malfunction_generator_and_process_data=malfunction_from_params(
            malfunction_parameters),
        obs_builder_object=tree_observation,
        random_seed=seed)
def _create_single_agent_environment(seed, x_dim, y_dim, n_agents, n_cities,
                                     timed, max_rails_between_cities,
                                     max_rails_in_city, observation_builder):

    # Set the seeds
    random.seed(seed)
    np.random.seed(seed)

    # Setup the environment
    env = RailEnv(width=x_dim,
                  height=y_dim,
                  rail_generator=sparse_rail_generator(
                      max_num_cities=n_cities,
                      seed=seed,
                      grid_mode=False,
                      max_rails_between_cities=max_rails_between_cities,
                      max_rails_in_city=max_rails_in_city),
                  schedule_generator=sparse_schedule_generator(timed=timed),
                  number_of_agents=n_agents,
                  obs_builder_object=observation_builder,
                  random_seed=seed)

    # Compute the maximum number of steps allowed.
    max_steps = int(4 * 2 * (env.height + env.width + (n_agents / n_cities)))

    # Return produced environment
    return env, max_steps, x_dim, y_dim
def train_validate_env_generator_params(train_set,
                                        n_agents,
                                        x_dim,
                                        y_dim,
                                        observation,
                                        stochastic_data,
                                        speed_ration_map,
                                        seed=1):
    if train_set:
        random_seed = np.random.randint(1000)
    else:
        random_seed = np.random.randint(1000, 2000)
    random.seed(random_seed)
    np.random.seed(random_seed)

    env = RailEnv(
        width=x_dim,
        height=y_dim,
        rail_generator=sparse_rail_generator(
            max_num_cities=3,
            # Number of cities in map (where train stations are)
            seed=seed,  # Random seed
            grid_mode=False,
            max_rails_between_cities=2,
            max_rails_in_city=3),
        schedule_generator=sparse_schedule_generator(speed_ration_map),
        number_of_agents=n_agents,
        malfunction_generator_and_process_data=malfunction_from_params(
            stochastic_data),
        # Malfunction data generator
        obs_builder_object=observation)
    return env, random_seed
Ejemplo n.º 5
0
 def get_rail_generator(self):
     rail_generator = sparse_rail_generator(
         seed=self._config['seed'],
         max_num_cities=self._config['max_num_cities'],
         grid_mode=self._config['grid_mode'],
         max_rails_between_cities=self._config['max_rails_between_cities'],
         max_rails_in_city=self._config['max_rails_in_city'])
     return rail_generator
Ejemplo n.º 6
0
 def test_static_env(self):
     rail_generator = sparse_rail_generator(seed=42,
                                            max_num_cities=3,
                                            grid_mode=True,
                                            max_rails_between_cities=4,
                                            max_rails_in_city=4)
     rail = rail_generator(25, 25, 5, 0, RandomState(None))[0]
     rail_from_grid_transition_map(rail)
Ejemplo n.º 7
0
def create_rail_env(args, load_env=""):
    '''
    Build a RailEnv object with the specified parameters,
    as described in the .yml file
    '''
    # Check if an environment file is provided
    if load_env:
        rail_generator = rail_from_file(load_env)
    else:
        rail_generator = sparse_rail_generator(
            max_num_cities=args.env.max_cities,
            grid_mode=args.env.grid,
            max_rails_between_cities=args.env.max_rails_between_cities,
            max_rails_in_city=args.env.max_rails_in_cities,
            seed=args.env.seed)

    # Build predictor and observator
    obs_type = args.policy.type.get_true_key()
    if PREDICTORS[obs_type] is ShortestDeviationPathPredictor:
        predictor = PREDICTORS[obs_type](
            max_depth=args.observator.max_depth,
            max_deviations=args.predictor.max_depth)
    else:
        predictor = PREDICTORS[obs_type](max_depth=args.predictor.max_depth)
    observator = OBSERVATORS[obs_type](args.observator.max_depth, predictor)

    # Initialize malfunctions
    malfunctions = None
    if args.env.malfunctions.enabled:
        malfunctions = ParamMalfunctionGen(
            MalfunctionParameters(
                malfunction_rate=args.env.malfunctions.rate,
                min_duration=args.env.malfunctions.min_duration,
                max_duration=args.env.malfunctions.max_duration))

    # Initialize agents speeds
    speed_map = None
    if args.env.variable_speed:
        speed_map = {1.: 0.25, 1. / 2.: 0.25, 1. / 3.: 0.25, 1. / 4.: 0.25}
    schedule_generator = sparse_schedule_generator(speed_map,
                                                   seed=args.env.seed)

    # Build the environment
    return RailEnvWrapper(params=args,
                          width=args.env.width,
                          height=args.env.height,
                          rail_generator=rail_generator,
                          schedule_generator=schedule_generator,
                          number_of_agents=args.env.num_trains,
                          obs_builder_object=observator,
                          malfunction_generator=malfunctions,
                          remove_agents_at_target=True,
                          random_seed=args.env.seed)
Ejemplo n.º 8
0
    def _launch(self):
        rail_generator = sparse_rail_generator(
            seed=self._config['seed'],
            max_num_cities=self._config['max_num_cities'],
            grid_mode=self._config['grid_mode'],
            max_rails_between_cities=self._config['max_rails_between_cities'],
            max_rails_in_city=self._config['max_rails_in_city'])

        malfunction_generator = no_malfunction_generator()
        if {
                'malfunction_rate', 'malfunction_min_duration',
                'malfunction_max_duration'
        } <= self._config.keys():
            stochastic_data = {
                'malfunction_rate': self._config['malfunction_rate'],
                'min_duration': self._config['malfunction_min_duration'],
                'max_duration': self._config['malfunction_max_duration']
            }
            malfunction_generator = malfunction_from_params(stochastic_data)

        speed_ratio_map = None
        if 'speed_ratio_map' in self._config:
            speed_ratio_map = {
                float(k): float(v)
                for k, v in self._config['speed_ratio_map'].items()
            }
        schedule_generator = sparse_schedule_generator(speed_ratio_map)

        env = None
        try:
            env = RailEnv(
                width=self._config['width'],
                height=self._config['height'],
                rail_generator=rail_generator,
                schedule_generator=schedule_generator,
                number_of_agents=self._config['number_of_agents'],
                malfunction_generator_and_process_data=malfunction_generator,
                obs_builder_object=self._observation.builder(),
                remove_agents_at_target=False,
                random_seed=self._config['seed'],
                # Should Below line be commented as here the env tries different configs,
                # hence opening it can be wasteful, morever the render has to be closed
                use_renderer=self._env_config.get('render'))

            env.reset()
        except ValueError as e:
            logging.error("=" * 50)
            logging.error(f"Error while creating env: {e}")
            logging.error("=" * 50)

        return env
Ejemplo n.º 9
0
    def _launch(self):
        print("NEW ENV LAUNCHED")
        n_agents, n_cities, dim = get_round_2_env()

        rail_generator = sparse_rail_generator(
            seed=self._config['seed'],
            max_num_cities=n_cities,
            grid_mode=self._config['grid_mode'],
            max_rails_between_cities=self._config['max_rails_between_cities'],
            max_rails_in_city=self._config['max_rails_in_city'])

        malfunction_generator = NoMalfunctionGen()
        if {
                'malfunction_rate', 'malfunction_min_duration',
                'malfunction_max_duration'
        } <= self._config.keys():
            stochastic_data = {
                'malfunction_rate': self._config['malfunction_rate'],
                'min_duration': self._config['malfunction_min_duration'],
                'max_duration': self._config['malfunction_max_duration']
            }
            malfunction_generator = ParamMalfunctionGen(stochastic_data)

        speed_ratio_map = None
        if 'speed_ratio_map' in self._config:
            speed_ratio_map = {
                float(k): float(v)
                for k, v in self._config['speed_ratio_map'].items()
            }
        schedule_generator = sparse_schedule_generator(speed_ratio_map)

        env = None
        try:
            env = RailEnv(width=dim,
                          height=dim,
                          rail_generator=rail_generator,
                          schedule_generator=schedule_generator,
                          number_of_agents=n_agents,
                          malfunction_generator=malfunction_generator,
                          obs_builder_object=self._observation.builder(),
                          remove_agents_at_target=False,
                          random_seed=self._config['seed'],
                          use_renderer=self._env_config.get('render'))

            env.reset()
        except ValueError as e:
            logging.error("=" * 50)
            logging.error(f"Error while creating env: {e}")
            logging.error("=" * 50)

        return env
Ejemplo n.º 10
0
def create_random_railways(project_root):
    speed_ration_map = {
        1 / 1: 1.0,  # Fast passenger train
        1 / 2.: 0.0,  # Fast freight train
        1 / 3.: 0.0,  # Slow commuter train
        1 / 4.: 0.0
    }  # Slow freight train

    rail_generator = sparse_rail_generator(grid_mode=False,
                                           max_num_cities=3,
                                           max_rails_between_cities=2,
                                           max_rails_in_city=3)
    schedule_generator = sparse_schedule_generator(speed_ration_map)
    return rail_generator, schedule_generator
Ejemplo n.º 11
0
 def _launch(self, env_params, observation):
     return RailEnv(
         width=env_params.x_dim,
         height=env_params.y_dim,
         rail_generator=sparse_rail_generator(
             max_num_cities=env_params.n_cities,
             grid_mode=False,
             max_rails_between_cities=env_params.max_rails_between_cities,
             max_rails_in_city=env_params.max_rails_in_city,
             seed=env_params.seed
         ),
         schedule_generator=sparse_schedule_generator(env_params.speed_profiles),
         number_of_agents=env_params.n_agents,
         malfunction_generator_and_process_data=malfunction_from_params(env_params.malfunction_parameters),
         obs_builder_object=observation,
         random_seed=env_params.seed
     )
Ejemplo n.º 12
0
def random_sparse_env_small(random_seed, max_width, max_height, observation_builder):
    random.seed(random_seed)
    size = random.randint(0, 5)
    width = 20 + size * 5
    height = 20 + size * 5
    nr_cities = 2 + size // 2 + random.randint(0, 2)
    nr_trains = min(nr_cities * 5, 5 + random.randint(0, 5))  # , 10 + random.randint(0, 10))
    max_rails_between_cities = 2
    max_rails_in_cities = 3 + random.randint(0, size)
    malfunction_rate = 30 + random.randint(0, 100)
    malfunction_min_duration = 3 + random.randint(0, 7)
    malfunction_max_duration = 20 + random.randint(0, 80)

    rail_generator = sparse_rail_generator(max_num_cities=nr_cities, seed=random_seed, grid_mode=False,
                                           max_rails_between_cities=max_rails_between_cities,
                                           max_rails_in_city=max_rails_in_cities)

    # new version:
    # stochastic_data = MalfunctionParameters(malfunction_rate, malfunction_min_duration, malfunction_max_duration)

    stochastic_data = {'malfunction_rate': malfunction_rate, 'min_duration': malfunction_min_duration,
                       'max_duration': malfunction_max_duration}

    schedule_generator = sparse_schedule_generator({1.: 0.25, 1. / 2.: 0.25, 1. / 3.: 0.25, 1. / 4.: 0.25})

    while width <= max_width and height <= max_height:
        try:
            env = RailEnv(width=width, height=height, rail_generator=rail_generator,
                          schedule_generator=schedule_generator, number_of_agents=nr_trains,
                          malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
                          obs_builder_object=observation_builder, remove_agents_at_target=False)

            print("[{}] {}x{} {} cities {} trains, max {} rails between cities, max {} rails in cities. Malfunction rate {}, {} to {} steps.".format(
                random_seed, width, height, nr_cities, nr_trains, max_rails_between_cities,
                max_rails_in_cities, malfunction_rate, malfunction_min_duration, malfunction_max_duration
            ))

            return env
        except ValueError as e:
            logging.error(f"Error: {e}")
            width += 5
            height += 5
            logging.info("Try again with larger env: (w,h):", width, height)
    logging.error(f"Unable to generate env with seed={random_seed}, max_width={max_height}, max_height={max_height}")
    return None
Ejemplo n.º 13
0
def test_schedule_from_file_sparse():
    """
    Test to see that all parameters are loaded as expected
    Returns
    -------

    """
    # Different agent types (trains) with different speeds.
    speed_ration_map = {
        1.: 0.25,  # Fast passenger train
        1. / 2.: 0.25,  # Fast freight train
        1. / 3.: 0.25,  # Slow commuter train
        1. / 4.: 0.25
    }  # Slow freight train

    # Generate Sparse test env
    rail_generator = sparse_rail_generator(
        max_num_cities=5,
        seed=1,
        grid_mode=False,
        max_rails_between_cities=3,
        max_rails_in_city=6,
    )
    schedule_generator = sparse_schedule_generator(speed_ration_map)

    create_and_save_env(file_name="./sparse_env_test.pkl",
                        rail_generator=rail_generator,
                        schedule_generator=schedule_generator)

    # Sparse generator
    rail_generator = rail_from_file("./sparse_env_test.pkl")
    schedule_generator = schedule_from_file("./sparse_env_test.pkl")
    sparse_env_from_file = RailEnv(width=1,
                                   height=1,
                                   rail_generator=rail_generator,
                                   schedule_generator=schedule_generator)
    sparse_env_from_file.reset(True, True)

    # Assert loaded agent number is correct
    assert sparse_env_from_file.get_num_agents() == 10

    # Assert max steps is correct
    assert sparse_env_from_file._max_episode_steps == 500
    def _thunk():
        env_Orig = RailEnvWrapper(
            width=x_dim,
            height=y_dim,
            rail_generator=sparse_rail_generator(
                max_num_cities=3,
                # Number of cities in map (where train stations are)
                seed=1,  # Random seed
                grid_mode=False,
                max_rails_between_cities=2,
                max_rails_in_city=3),
            schedule_generator=sparse_schedule_generator(speed_ration_map),
            number_of_agents=n_agents,
            stochastic_data=stochastic_data,  # Malfunction data generator
            obs_builder_object=TreeObservation)

        env = copy.deepcopy(env_Orig)

        # After training we want to render the results so we also load a renderer
        #env_renderer = RenderTool(env, gl="PILSVG", )

        return env
Ejemplo n.º 15
0
def get_env(config=None, rl=False):
    n_agents = 16
    schedule_generator = sparse_schedule_generator(None)

    rail_generator = sparse_rail_generator(
        seed=seed,
        max_num_cities=3,
        grid_mode=False,
        max_rails_between_cities=2,
        max_rails_in_city=4,
    )

    if rl:
        obs_builder = make_obs("combined", {
            "path": None,
            "simple_meta": None
        }).builder()
    else:
        obs_builder = DummyObs()

    params = MalfunctionParameters(malfunction_rate=1 / 1000,
                                   max_duration=50,
                                   min_duration=20)
    malfunction_generator = ParamMalfunctionGen(params)

    env = RailEnv(
        width=28,
        height=28,
        rail_generator=rail_generator,
        schedule_generator=schedule_generator,
        number_of_agents=n_agents,
        malfunction_generator=malfunction_generator,
        obs_builder_object=obs_builder,
        remove_agents_at_target=True,
        random_seed=seed,
    )

    return env
Ejemplo n.º 16
0
def demo_lpg_planing():
    from flatland.envs.rail_generators import sparse_rail_generator
    from flatland.envs.schedule_generators import sparse_schedule_generator
    from flatland.envs.observations import TreeObsForRailEnv
    n_agents = 1
    x_dim = 25
    y_dim = 25
    n_cities = 4
    max_rails_between_cities = 2
    max_rails_in_city = 3
    seed = 42
    # Observation parameters
    observation_tree_depth = 2

    domain_file = "./pddl/flatland.pddl"
    problem_dir = "./pddl/flatland"
    num_problems = 6

    tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth)

    env = PDDLFlatlandEnv(
        width=x_dim,
        height=y_dim,
        rail_generator=sparse_rail_generator(
            max_num_cities=n_cities,
            seed=seed,
            grid_mode=False,
            max_rails_between_cities=max_rails_between_cities,
            max_rails_in_city=max_rails_in_city),
        schedule_generator=sparse_schedule_generator(),
        number_of_agents=n_agents,
        obs_builder_object=tree_observation,
        domain_file=domain_file,
        problem_dir=problem_dir)

    for problem_index in range(num_problems):
        env.fix_problem_index(problem_index)
        run_planning_flatland_demo(env, 'lpg')
Ejemplo n.º 17
0
def get_env(config=None, rl=False):
    n_agents = 32
    schedule_generator = sparse_schedule_generator(None)

    rail_generator = sparse_rail_generator(
        seed=seed,
        max_num_cities=4,
        grid_mode=False,
        max_rails_between_cities=2,
        max_rails_in_city=4,
    )

    if rl:
        obs_builder = make_obs(
            config["env_config"]['observation'],
            config["env_config"].get('observation_config')).builder()
    else:
        obs_builder = DummyObs()

    params = MalfunctionParameters(malfunction_rate=1 / 1000,
                                   max_duration=50,
                                   min_duration=20)
    malfunction_generator = ParamMalfunctionGen(params)

    env = RailEnv(
        width=32,
        height=32,
        rail_generator=rail_generator,
        schedule_generator=schedule_generator,
        number_of_agents=n_agents,
        malfunction_generator=malfunction_generator,
        obs_builder_object=obs_builder,
        remove_agents_at_target=True,
        random_seed=seed,
    )

    return env
Ejemplo n.º 18
0
    def __init__(self,
                 n_cars=3,
                 n_acts=5,
                 min_obs=-1,
                 max_obs=1,
                 n_nodes=2,
                 ob_radius=10,
                 x_dim=36,
                 y_dim=36,
                 feats='all'):

        self.tree_obs = tree_observation.TreeObservation(n_nodes)
        self.n_cars = n_cars
        self.n_nodes = n_nodes
        self.ob_radius = ob_radius
        self.feats = feats

        rail_gen = sparse_rail_generator(max_num_cities=3,
                                         seed=666,
                                         grid_mode=False,
                                         max_rails_between_cities=2,
                                         max_rails_in_city=3)

        self._rail_env = RailEnv(
            width=x_dim,
            height=y_dim,
            rail_generator=rail_gen,
            schedule_generator=sparse_schedule_generator(speed_ration_map),
            number_of_agents=n_cars,
            malfunction_generator_and_process_data=malfunction_from_params(
                stochastic_data),
            obs_builder_object=self.tree_obs)

        self.renderer = RenderTool(self._rail_env, gl="PILSVG")
        self.action_dict = dict()
        self.info = dict()
        self.old_obs = dict()
Ejemplo n.º 19
0
    def update_env_with_params(self,
                               width,
                               height,
                               num_agents,
                               max_steps,
                               rail_type,
                               rail_gen_params,
                               seed=-1):
        if seed == -1:
            seed = random.randint(0, 100000)

        self.num_agents = num_agents
        self.max_steps = max_steps

        if rail_type == 'complex':
            self.rail_gen = complex_rail_generator(
                nr_start_goal=rail_gen_params['nr_start_goal'],
                nr_extra=rail_gen_params['nr_extra'],
                min_dist=rail_gen_params['min_dist'],
                max_dist=rail_gen_params['max_dist'],
                seed=seed)

            #self.schedule_gen = complex_schedule_generator()
        elif rail_type == 'sparse':
            self.rail_gen = sparse_rail_generator(
                max_num_cities=rail_gen_params['num_cities'],
                seed=seed,
                grid_mode=rail_gen_params['grid_mode'],
                max_rails_between_cities=rail_gen_params[
                    'max_rails_between_cities'],
                max_rails_in_city=rail_gen_params['max_rails_in_city'])

        else:
            raise ValueError(
                'Please specify either "complex" or "sparse" as rail_type')

        self.generate_env(width, height)
Ejemplo n.º 20
0
def create_save_env(path,
                    width,
                    height,
                    num_trains,
                    max_cities,
                    max_rails_between_cities,
                    max_rails_in_cities,
                    grid=False,
                    seed=0):
    '''
    Create a RailEnv environment with the given settings and save it as pickle
    '''
    rail_generator = sparse_rail_generator(
        max_num_cities=max_cities,
        seed=seed,
        grid_mode=grid,
        max_rails_between_cities=max_rails_between_cities,
        max_rails_in_city=max_rails_in_cities,
    )
    env = RailEnv(width=width,
                  height=height,
                  rail_generator=rail_generator,
                  number_of_agents=num_trains)
    env.save(path)
def create_multi_agent_rail_env(seed, timed):
    n_agents = 4
    # Environment parameters
    x_dim = 25
    y_dim = 25
    n_cities = 4
    max_rails_between_cities = 2
    max_rails_in_city = 3

    # Default observation parameters
    observation_tree_depth = 2
    observation_max_path_depth = 30

    # Default (tree) observation builder
    predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth)
    tree_observation = TreeObsForRailEnvExtended(
        max_depth=observation_tree_depth, predictor=predictor)

    random.seed(seed)
    np.random.seed(seed)

    env = RailEnv(width=x_dim,
                  height=y_dim,
                  rail_generator=sparse_rail_generator(
                      max_num_cities=n_cities,
                      seed=seed,
                      grid_mode=False,
                      max_rails_between_cities=max_rails_between_cities,
                      max_rails_in_city=max_rails_in_city),
                  schedule_generator=sparse_schedule_generator(timed=timed),
                  number_of_agents=n_agents,
                  malfunction_generator_and_process_data=None,
                  obs_builder_object=tree_observation,
                  random_seed=seed)
    max_steps = int(4 * 2 * (env.height + env.width + (n_agents / n_cities)))
    return env, max_steps, x_dim, y_dim, observation_tree_depth, observation_max_path_depth
Ejemplo n.º 22
0
                   }

# Custom observation builder
TreeObservation = TreeObsForRailEnv(max_depth=2)

# Different agent types (trains) with different speeds.
speed_ration_map = {1.: 1.,  # Fast passenger train
                    1. / 2.: 0.0,  # Fast freight train
                    1. / 3.: 0.0,  # Slow commuter train
                    1. / 4.: 0.0}  # Slow freight train

env = RailEnv(width=x_dim,
              height=y_dim,
              rail_generator=sparse_rail_generator(max_num_cities=3,
                                                   # Number of cities in map (where train stations are)
                                                   seed=1,  # Random seed
                                                   grid_mode=False,
                                                   max_rails_between_cities=2,
                                                   max_rails_in_city=4),
              schedule_generator=sparse_schedule_generator(speed_ration_map),
              number_of_agents=n_agents,
              malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
              obs_builder_object=TreeObservation)
env.reset()

env_renderer = RenderTool(env, gl="PILSVG", )
num_features_per_node = env.obs_builder.observation_dim

tree_depth = 2
nr_nodes = 0
for i in range(tree_depth + 1):
    nr_nodes += np.power(4, i)
Ejemplo n.º 23
0
                                     predictor=predictor)

# Fraction of train which each speed
speed_profiles = {
    1.: 1.0,  # Fast passenger train
    1. / 2.: 0.0,  # Fast freight train
    1. / 3.: 0.0,  # Slow commuter train
    1. / 4.: 0.0  # Slow freight train
}

# Setup the environment
env = RailEnv(width=x_dim,
              height=y_dim,
              rail_generator=sparse_rail_generator(
                  max_num_cities=n_cities,
                  grid_mode=False,
                  max_rails_between_cities=max_rails_between_cities,
                  max_rails_in_city=max_rails_in_city),
              schedule_generator=sparse_schedule_generator(speed_profiles),
              number_of_agents=n_agents,
              malfunction_generator_and_process_data=malfunction_from_params(
                  malfunction_parameters),
              obs_builder_object=tree_observation,
              random_seed=seed)

env.reset(regenerate_schedule=True, regenerate_rail=True)

# Setup renderer

env_renderer = RenderTool(env)
'''
Ejemplo n.º 24
0
######### Set parameters for rail map generation #########
width = 16 * 4  # 16 * 7  # With of map
height = 9 * 4  # 9 * 7  # Height of map
nr_trains = 10  # nr_trains = 50  # Number of trains that have an assigned task in the env
cities_in_map = 6  # 20  # Number of cities where agents can start or end
seed = 14  # Random seed
grid_distribution_of_cities = False  # Type of city distribution, if False cities are randomly placed
max_rails_between_cities = 2  # Max number of tracks allowed between cities. This is number of entry point to a city
max_rail_in_cities = 6  # Max number of parallel tracks within a city, representing a realistic trainstation

######### Initialize railway #########
rail_generator = sparse_rail_generator(
    max_num_cities=cities_in_map,
    seed=seed,
    grid_mode=grid_distribution_of_cities,
    max_rails_between_cities=max_rails_between_cities,
    max_rails_in_city=max_rail_in_cities,
)
# Different agent types (trains) with different speeds.
speed_ration_map = {
    1.: 0.25,  # Fast passenger train
    1. / 2.: 0.25,  # Fast freight train
    1. / 3.: 0.25,  # Slow commuter train
    1. / 4.: 0.25
}  # Slow freight train
schedule_generator = sparse_schedule_generator(speed_ration_map)
stochastic_data = {
    "malfunction_rate": 10000,  # Rate of malfunction occurence
    "min_duration": 15,  # Minimal duration of malfunction
    "max_duration": 50  # Max duration of malfunction
Ejemplo n.º 25
0
def main(args, dir):
    '''
	
	:param args: 
	:return: 
	Episodes to debug (set breakpoint in episodes loop to debug):
	- ep = 3, agent 1 spawns in front of 3, blocking its path; 0 and 2 are in a deadlock since they have same priority
	- ep = 4, agents stop because of wrong priorities even though the conflict zone wasn't entered,
	- ep = 14, 
	'''
    rail_generator = sparse_rail_generator(
        max_num_cities=args.max_num_cities,
        seed=args.seed,
        grid_mode=args.grid_mode,
        max_rails_between_cities=args.max_rails_between_cities,
        max_rails_in_city=args.max_rails_in_city,
    )

    # Maps speeds to % of appearance in the env
    speed_ration_map = {
        1.: 0.25,  # Fast passenger train
        1. / 2.: 0.25,  # Fast freight train
        1. / 3.: 0.25,  # Slow commuter train
        1. / 4.: 0.25
    }  # Slow freight train

    observation_builder = GraphObsForRailEnv(
        predictor=ShortestPathPredictorForRailEnv(
            max_depth=args.prediction_depth),
        bfs_depth=4)

    env = RailEnv(
        width=args.width,
        height=args.height,
        rail_generator=rail_generator,
        schedule_generator=sparse_schedule_generator(speed_ration_map),
        number_of_agents=args.num_agents,
        obs_builder_object=observation_builder,
        malfunction_generator_and_process_data=malfunction_from_params(
            parameters={
                'malfunction_rate':
                args.malfunction_rate,  # Rate of malfunction occurrence
                'min_duration':
                args.min_duration,  # Minimal duration of malfunction
                'max_duration':
                args.max_duration  # Max duration of malfunction
            }))

    if args.render:
        env_renderer = RenderTool(env,
                                  agent_render_variant=AgentRenderVariant.
                                  AGENT_SHOWS_OPTIONS_AND_BOX,
                                  show_debug=True)

    sm = stateMachine()
    tb = TestBattery(env, observation_builder)

    state_machine_action_dict = {}
    railenv_action_dict = {}
    # max_time_steps = env.compute_max_episode_steps(args.width, args.height)
    max_time_steps = 200
    T_rewards = []  # List of episodes rewards
    T_Qs = []  # List of q values
    T_num_done_agents = []  # List of number of done agents for each episode
    T_all_done = []  # If all agents completed in each episode
    T_episodes = []  # Time taken for each episode

    if args.save_image and not os.path.isdir("image_dump"):
        os.makedirs("image_dump")

    step_taken = 0
    total_step_taken = 0
    total_episodes = 0
    step_times = []  # Time taken for each step

    for ep in range(args.num_episodes):
        # Reset info at the beginning of an episode
        start_time = time.time()  # Take time of one episode

        if args.generate_baseline:
            if not os.path.isdir("image_dump/" + str(dir)) and args.save_image:
                os.makedirs("image_dump/" + str(dir))
        else:
            if not os.path.isdir("image_dump/" + str(ep)) and args.save_image:
                os.makedirs("image_dump/" + str(ep))

        state, info = env.reset()
        tb.reset()

        if args.render:
            env_renderer.reset()
        reward_sum, all_done = 0, False  # reward_sum contains the cumulative reward obtained as sum during the steps
        num_done_agents = 0

        state_machine_action = {}
        for i in range(env.number_of_agents):
            state_machine_action[i] = 0

        for step in range(max_time_steps):
            start_step_time = time.time()

            #if step % 10 == 0:
            #	print(step)

            # Test battery
            # see test_battery.py
            triggers = tb.tests(state, args.prediction_depth,
                                state_machine_action)
            # state machine based on triggers of test battery
            # see state_machine.py
            state_machine_action = sm.act(
                triggers)  # State machine picks action

            for a in range(env.get_num_agents()):
                #if info['action_required'][a]:
                #	#railenv_action = observation_builder.choose_railenv_action(a, state_machine_action[a])
                #	railenv_action = observation_builder.choose_railenv_action(a, state_machine_action[a])
                #	state_machine_action_dict.update({a: state_machine_action})
                #	railenv_action_dict.update({a: railenv_action})
                # railenv_action = observation_builder.choose_railenv_action(a, state_machine_action[a])
                railenv_action = observation_builder.choose_railenv_action(
                    a, state_machine_action[a])
                state_machine_action_dict.update({a: state_machine_action})
                railenv_action_dict.update({a: railenv_action})

            state, reward, done, info = env.step(
                railenv_action_dict)  # Env step

            if args.generate_baseline:
                #env_renderer.render_env(show=True, show_observations=False, show_predictions=True)
                env_renderer.render_env(show=False,
                                        show_observations=False,
                                        show_predictions=True)
            else:
                env_renderer.render_env(show=True,
                                        show_observations=False,
                                        show_predictions=True)

            if args.generate_baseline:
                if args.save_image:
                    env_renderer.save_image("image_dump/" + str(dir) +
                                            "/image_" + str(step) + "_.png")
            else:
                if args.save_image:
                    env_renderer.save_image("image_dump/" + str(ep) +
                                            "/image_" + str(step) + "_.png")

            if args.debug:
                for a in range(env.get_num_agents()):
                    log('\n\n#########################################')
                    log('\nInfo for agent {}'.format(a))
                    #log('\npath : {}'.format(state[a]["path"]))
                    log('\noverlap : {}'.format(state[a]["overlap"]))
                    log('\ndirection : {}'.format(state[a]["direction"]))
                    log('\nOccupancy, first layer: {}'.format(
                        state[a]["occupancy"]))
                    log('\nOccupancy, second layer: {}'.format(
                        state[a]["conflict"]))
                    log('\nForks: {}'.format(state[a]["forks"]))
                    log('\nTarget: {}'.format(state[a]["target"]))
                    log('\nPriority: {}'.format(state[a]["priority"]))
                    log('\nMax priority encountered: {}'.format(
                        state[a]["max_priority"]))
                    log('\nNum malfunctioning agents (globally): {}'.format(
                        state[a]["n_malfunction"]))
                    log('\nNum agents ready to depart (globally): {}'.format(
                        state[a]["ready_to_depart"]))
                    log('\nStatus: {}'.format(info['status'][a]))
                    log('\nPosition: {}'.format(env.agents[a].position))
                    log('\nTarget: {}'.format(env.agents[a].target))
                    log('\nMoving? {} at speed: {}'.format(
                        env.agents[a].moving, info['speed'][a]))
                    log('\nAction required? {}'.format(
                        info['action_required'][a]))
                    log('\nState machine action: {}'.format(
                        state_machine_action_dict[a]))
                    log('\nRailenv action: {}'.format(railenv_action_dict[a]))
                    log('\nRewards: {}'.format(reward[a]))
                    log('\n\n#########################################')

            reward_sum += sum(reward[a] for a in range(env.get_num_agents()))

            step_taken = step
            time_taken_step = time.time() - start_step_time
            step_times.append(time_taken_step)

            if done['__all__']:
                all_done = True
                break

        total_step_taken += step_taken

        time_taken = time.time() - start_time  # Time taken for one episode
        total_episodes = ep

        # Time metrics - too precise
        avg_time_step = sum(step_times) / step_taken
        #print("Avg time step: " + str(avg_time_step))

        # No need to close the renderer since env parameter sizes stay the same
        T_rewards.append(reward_sum)
        # Compute num of agents that reached their target
        for a in range(env.get_num_agents()):
            if done[a]:
                num_done_agents += 1
        percentage_done_agents = num_done_agents / env.get_num_agents()
        log("\nDone agents in episode: {}".format(percentage_done_agents))
        T_num_done_agents.append(
            percentage_done_agents)  # In proportion to total
        T_all_done.append(all_done)

    # Average number of agents that reached their target
    avg_done_agents = sum(T_num_done_agents) / len(T_num_done_agents) if len(
        T_num_done_agents) > 0 else 0
    avg_reward = sum(T_rewards) / len(T_rewards) if len(T_rewards) > 0 else 0
    avg_norm_reward = avg_reward / (max_time_steps / env.get_num_agents())

    avg_ep_time = sum(T_episodes) / args.num_episodes

    if total_episodes == 0:
        total_episodes = 1

    log("\nSeed: " + str(args.seed) \
      + "\t | Avg_done_agents: " + str(avg_done_agents)\
      + "\t | Avg_reward: " + str(avg_reward)\
      + "\t | Avg_norm_reward: " + str(avg_norm_reward)\
      + "\t | Max_num_time_steps: " + str(max_time_steps)\
      + "\t | Avg_num_time_steps: " + str(total_step_taken/total_episodes)
            + "\t | Avg episode time: " + str(avg_ep_time))
Ejemplo n.º 26
0
def test(args, T, ep, dqn, val_mem, metrics, results_dir, evaluate=False):

    # Init env and set in evaluation mode
    # Maps speeds to % of appearance in the env
    speed_ration_map = {
        1.: 0.25,  # Fast passenger train
        1. / 2.: 0.25,  # Fast freight train
        1. / 3.: 0.25,  # Slow commuter train
        1. / 4.: 0.25
    }  # Slow freight train

    schedule_generator = sparse_schedule_generator(speed_ration_map)

    observation_builder = GraphObsForRailEnv(
        predictor=ShortestPathPredictorForRailEnv(
            max_depth=args.prediction_depth))

    env = RailEnv(
        width=args.width,
        height=args.height,
        rail_generator=sparse_rail_generator(
            max_num_cities=args.max_num_cities,
            seed=
            ep,  # Use episode as seed when evaluation is performed during training
            grid_mode=args.grid_mode,
            max_rails_between_cities=args.max_rails_between_cities,
            max_rails_in_city=args.max_rails_in_city,
        ),
        schedule_generator=schedule_generator,
        number_of_agents=args.num_agents,
        obs_builder_object=observation_builder,
        malfunction_generator_and_process_data=malfunction_from_params(
            parameters={
                'malfunction_rate': args.malfunction_rate,
                'min_duration': args.min_duration,
                'max_duration': args.max_duration
            }),
    )

    if args.render:
        env_renderer = RenderTool(env,
                                  gl="PILSVG",
                                  agent_render_variant=AgentRenderVariant.
                                  AGENT_SHOWS_OPTIONS_AND_BOX,
                                  show_debug=True,
                                  screen_height=1080,
                                  screen_width=1920)

    #max_time_steps = env.compute_max_episode_steps(env.width, env.height)
    max_time_steps = 200  # TODO Debug
    # metrics['steps'].append(T)
    metrics['episodes'].append(ep)
    T_rewards = []  # List of episodes rewards
    T_Qs = []  # List
    T_num_done_agents = []  # List of number of done agents for each episode
    T_all_done = []  # If all agents completed in each episode
    network_action_dict = dict()
    railenv_action_dict = dict()
    qvalues = {}

    # Test performance over several episodes
    for ep in range(args.evaluation_episodes):
        # Reset info
        state, info = env.reset()
        reward_sum, all_done = 0, False  # reward_sum contains the cumulative reward obtained as sum during the steps
        num_done_agents = 0
        if args.render:
            env_renderer.reset()

        # Choose first action - decide entering of agents into the environment
        for a in range(env.get_num_agents()):
            action = np.random.choice((0, 2))
            railenv_action_dict.update({a: action})
        state, reward, done, info = env.step(railenv_action_dict)  # Env step
        reward_sum += sum(reward[a] for a in range(env.get_num_agents()))

        if args.render:
            env_renderer.render_env(show=True,
                                    show_observations=False,
                                    show_predictions=True)

        for step in range(max_time_steps - 1):
            # Choose actions
            for a in range(env.get_num_agents()):
                if info['action_required'][a]:
                    network_action = dqn.act(
                        state[a]
                    )  # Choose an action greedily (with noisy weights)
                    # network_action = 0
                    railenv_action = observation_builder.choose_railenv_action(
                        a, network_action)
                    qvalues.update({a: dqn.get_q_values(state[a])})
                else:
                    network_action = 0
                    railenv_action = 0
                    qvalues.update({a: [0, 0]})  # '0' if wasn't updated

                railenv_action_dict.update({a: railenv_action})
                network_action_dict.update({a: network_action})

            if args.debug:
                for a in range(env.get_num_agents()):
                    print('#########################################')
                    print('Info for agent {}'.format(a))
                    print('Occupancy, first layer: {}'.format(
                        state[a][:args.prediction_depth]))
                    print('Occupancy, second layer: {}'.format(
                        state[a][args.prediction_depth:args.prediction_depth *
                                 2]))
                    print('Forks: {}'.format(
                        state[a][args.prediction_depth *
                                 2:args.prediction_depth * 3]))
                    print('Target: {}'.format(
                        state[a][args.prediction_depth *
                                 3:args.prediction_depth * 4]))
                    print('Priority: {}'.format(
                        state[a][args.prediction_depth * 4]))
                    print('Max priority encountered: {}'.format(
                        state[a][args.prediction_depth * 4 + 1]))
                    print('Num malfunctoning agents (globally): {}'.format(
                        state[a][args.prediction_depth * 4 + 2]))
                    print('Num agents ready to depart (globally): {}'.format(
                        state[a][args.prediction_depth * 4 + 3]))
                    print('Status: {}'.format(info['status'][a]))
                    print('Position: {}'.format(env.agents[a].position))
                    print('Moving? {} at speed: {}'.format(
                        env.agents[a].moving, info['speed'][a]))
                    print('Action required? {}'.format(
                        info['action_required'][a]))
                    print('Network action: {}'.format(network_action_dict[a]))
                    print('Railenv action: {}'.format(railenv_action_dict[a]))
                    print('Q values: {}'.format(qvalues[a]))
                    # print('QValues: {}'.format(qvalues))
                    print('Rewards: {}'.format(reward[a]))

            # Breakpoint for debugging here
            state, reward, done, info = env.step(
                railenv_action_dict)  # Env step
            if args.render:
                env_renderer.render_env(show=True,
                                        show_observations=False,
                                        show_predictions=True)

            reward_sum += sum(reward[a] for a in range(env.get_num_agents()))

            if done['__all__']:
                all_done = True
                break
        # No need to close the renderer since env parameter sizes stay the same
        T_rewards.append(reward_sum)
        # Compute num of agents that reached their target
        for a in range(env.get_num_agents()):
            if done[a]:
                num_done_agents += 1
        T_num_done_agents.append(
            num_done_agents / env.get_num_agents())  # In proportion to total
        T_all_done.append(all_done)

    # Test Q-values over validation memory
    for state in val_mem:  # Iterate over valid states
        T_Qs.append(dqn.evaluate_q(state))
    if args.debug:
        print('T_Qs: {}'.format(T_Qs))  # These are Qs from a single agent TODO

    avg_done_agents = sum(T_num_done_agents) / len(
        T_num_done_agents
    )  # Average number of agents that reached their target
    avg_reward = sum(T_rewards) / len(T_rewards)
    avg_norm_reward = avg_reward / (max_time_steps / env.get_num_agents())

    # avg_reward, avg_Q = sum(T_rewards) / len(T_rewards), sum(T_Qs) / len(T_Qs)
    if not evaluate:
        # Save model parameters if improved
        if avg_done_agents > metrics['best_avg_done_agents']:
            metrics['best_avg_done_agents'] = avg_done_agents
            dqn.save(results_dir)

        # Append to results and save metrics
        metrics['rewards'].append(T_rewards)
        metrics['Qs'].append(T_Qs)
        torch.save(metrics, os.path.join(results_dir, 'metrics.pth'))

        # Plot HTML
        _plot_line(metrics['episodes'],
                   metrics['rewards'],
                   'Reward',
                   path=results_dir)  # Plot rewards in episodes
        _plot_line(metrics['episodes'], metrics['Qs'], 'Q', path=results_dir)

    # Return average number of done agents (in proportion) and average reward
    return avg_done_agents, avg_reward, avg_norm_reward
Ejemplo n.º 27
0
def train(env):
    n_agents = env["n_agents"]
    x_dim = env["x_dim"]
    y_dim = env["y_dim"]
    n_cities = env["n_cities"]
    max_rails_between_cities = env["max_rails_between_cities"]
    max_rails_in_city = env["max_rails_in_city"]
    seed = 0
    use_fast_tree_obs = False

    # Observation parameters
    observation_tree_depth = 4
    observation_radius = 10
    observation_max_path_depth = 30

    # Set the seeds
    random.seed(seed)
    np.random.seed(seed)

    # Break agents from time to time
    malfunction_parameters = MalfunctionParameters(
        malfunction_rate=1. / 10000,  # Rate of malfunctions
        min_duration=15,  # Minimal duration
        max_duration=50  # Max duration
    )

    # Observation builder
    predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth)
    tree_observation = None

    if use_fast_tree_obs:
        tree_observation = FastTreeObs(max_depth=observation_tree_depth)
        print("Using FastTreeObs")
    else:
        tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth,
                                             predictor=predictor)
        print("Using StandardTreeObs")

    speed_profiles = {
        1.: 1.0,  # Fast passenger train
        1. / 2.: 0.0,  # Fast freight train
        1. / 3.: 0.0,  # Slow commuter train
        1. / 4.: 0.0  # Slow freight train
    }

    env = RailEnv(
        width=x_dim,
        height=y_dim,
        rail_generator=sparse_rail_generator(
            max_num_cities=n_cities,
            grid_mode=False,
            max_rails_between_cities=max_rails_between_cities,
            max_rails_in_city=max_rails_in_city),
        schedule_generator=sparse_schedule_generator(speed_profiles),
        number_of_agents=n_agents,
        malfunction_generator_and_process_data=malfunction_from_params(
            malfunction_parameters),
        obs_builder_object=tree_observation,
        random_seed=seed)

    rewards = []
    obs, info = env.reset()

    if use_fast_tree_obs:
        state_size = tree_observation.observation_dim
    else:
        # Calculate the state size given the depth of the tree observation and the
        # number of features
        n_features_per_node = env.obs_builder.observation_dim
        n_nodes = 0
        for i in range(observation_tree_depth + 1):
            n_nodes += np.power(4, i)

        state_size = n_features_per_node * n_nodes

    action_size = 5

    DEVICE = 'cpu'
    # if torch.cuda.is_available():
    # 	DEVICE = 'gpu'

    buffer_length = 10000
    steps_to_save_model = 10
    step_size = 100
    num_steps = 100  # update every 100 steps
    avg_steps = 20  # num steps to average and plot rewards
    reward_q = []
    batch_size = 100

    agent_obs = np.array([None] * env.get_num_agents())

    max_steps = int(4 * 2 * (env.height + env.width + (n_agents / n_cities)))
    num_episodes = 100000

    agent_init_params = []
    sa_size = []

    for i in range(n_agents):
        agent_init_params.append({
            'num_in_pol': state_size,
            'num_out_pol': action_size,
            'init_weights': 'model.pt'
        })
        sa_size.append((state_size, action_size))

    hyperparams = {
        "tau": 0.01,
        "pi_lr": 0.00001,
        "q_lr": 0.00005,
        "pol_hidden_dim": 256,
        "critic_hidden_dim": 256,
        "attend_heads": 8
    }

    model = AttentionSAC(agent_init_params=agent_init_params,
                         sa_size=sa_size,
                         tau=hyperparams["tau"],
                         pi_lr=hyperparams["pi_lr"],
                         q_lr=hyperparams["q_lr"],
                         pol_hidden_dim=hyperparams["pol_hidden_dim"],
                         critic_hidden_dim=hyperparams["critic_hidden_dim"],
                         attend_heads=hyperparams["attend_heads"])
    model.init_dict = {}

    replay_buffer = ReplayBuffer(buffer_length, n_agents,
                                 [state_size for i in range(n_agents)],
                                 [action_size for i in range(n_agents)])

    print("MAX STEPS: " + str(max_steps))
    print("NUM EPISODES: ", num_episodes)
    print("HYPERPARAMS: ")
    print(hyperparams)

    start_time = time.time()

    for ep in range(num_episodes):
        print("Episode " + str(ep) + ":", flush=True)
        obs, info = env.reset(True, True)
        model.prep_rollouts(device=DEVICE)
        reward_sum_for_this_episode = 0

        for steps in range(max_steps):
            if steps % step_size == 0:
                print("=", end="", flush=True)
            for agent in env.get_agent_handles():
                if obs[agent] is not None:
                    if use_fast_tree_obs:
                        agent_obs[agent] = obs[agent]
                    else:
                        agent_obs[agent] = normalize_observation(
                            obs[agent],
                            observation_tree_depth,
                            observation_radius=observation_radius)
                else:
                    agent_obs[agent] = np.array([0.] * state_size)

            action_dict = {}
            agent_actions = []

            torch_obs = [
                Variable(torch.Tensor([agent_obs[i]]), requires_grad=False)
                for i in range(n_agents)
            ]
            torch_agent_actions = model.step(torch_obs, explore=True)
            agent_actions = [ac.data.numpy() for ac in torch_agent_actions]

            for i in range(n_agents):
                dist = torch_agent_actions[i][0]
                idx = -1
                for j in range(action_size):
                    if dist[j] != 0:
                        idx = j
                        break
                action_dict[i] = idx

            next_obs, all_rewards, done, info = env.step(action_dict)

            rewards = []
            dones = []

            next_agent_obs = np.array([None] * env.get_num_agents())

            for agent in env.get_agent_handles():
                if next_obs[agent] is not None:
                    if use_fast_tree_obs:
                        next_agent_obs[agent] = next_obs[agent]
                    else:
                        next_agent_obs[agent] = normalize_observation(
                            obs[agent],
                            observation_tree_depth,
                            observation_radius=observation_radius)
                else:
                    next_agent_obs[agent] = np.array([0.] * state_size)

            for i in range(n_agents):
                reward_sum_for_this_episode += all_rewards[i]
                rewards.append(all_rewards[i])
                all_rewards[i] += augment_reward(agent_obs[agent])
                dones.append(done[i])

            replay_buffer.push(np.array([agent_obs]), np.array(agent_actions),
                               np.array([rewards]), np.array([next_agent_obs]),
                               np.array([dones]))

            if steps % num_steps == 0:
                model.prep_training(device=DEVICE)
                sample = replay_buffer.sample(batch_size, norm_rews=False)
                #print(sample)
                model.update_critic(sample)
                model.update_policies(sample)
                model.update_all_targets()
                model.prep_rollouts(device=DEVICE)

        reward_sum_for_this_episode /= n_agents
        reward_q.append(reward_sum_for_this_episode)

        if len(reward_q) == avg_steps:
            wandb.log({'reward': np.mean(reward_q)})
            reward_q = []

        print()

        if ep % steps_to_save_model == 0:
            print("\nSaving model")
            model.save(os.getcwd() + "/model.pt")
            cur_time = time.time()
            time_elapsed = (cur_time - start_time) // 60
            print("Time Elapsed: " + str(time_elapsed) + "\n")
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "n:", ["n_trials="])
    except getopt.GetoptError:
        print('test_navigation_single_agent.py -n <n_trials>')
        sys.exit(2)
    for opt, arg in opts:
        if opt in ('-n', '--n_trials'):
            n_trials = int(arg)

    random.seed(1)
    np.random.seed(1)

    ######## TEST SET SELECTION - PARAMETERS ########
    
    test_multi_agent_setup = 1             # 1 for Medium size test, 2 for Big size test
    test_n_agents = 5                      # Number of agents to test (3 - 5 - 7 for Medium, 5 - 7 - 10 for Big)
    test_malfunctions_enabled = True       # Malfunctions enabled?
    test_agents_one_speed = True           # Test agents with the same speed (1) or with 4 different speeds?

    #################################################

    # Medium size
    if test_multi_agent_setup == 1:
        x_dim = 16*3
        y_dim = 9*3
        max_num_cities = 5
        max_rails_between_cities = 2
        max_rails_in_city = 3

    # Big size
    if test_multi_agent_setup == 2:
        x_dim = 16*4
        y_dim = 9*4
        max_num_cities = 9
        max_rails_between_cities = 5
        max_rails_in_city = 5


    stochastic_data = {'malfunction_rate': 80,  # Rate of malfunction occurence of single agent
                       'min_duration': 15,  # Minimal duration of malfunction
                       'max_duration': 50  # Max duration of malfunction
                       }

    # Custom observation builder
    tree_depth = 2
    TreeObservation = TreeObsForRailEnv(max_depth=tree_depth, predictor = ShortestPathPredictorForRailEnv(20))

    np.savetxt(fname=path.join('NetsTest' , 'info.txt'), X=[x_dim,y_dim,test_n_agents,max_num_cities,max_rails_between_cities,max_rails_in_city,tree_depth],delimiter=';')

    # Different agent types (trains) with different speeds.
    if test_agents_one_speed:
        speed_ration_map = {1.: 1.,  # Fast passenger train
                            1. / 2.: 0.0,  # Fast freight train
                            1. / 3.: 0.0,  # Slow commuter train
                            1. / 4.: 0.0}  # Slow freight train
    else:
        speed_ration_map = {1.: 0.25,  # Fast passenger train
                            1. / 2.: 0.25,  # Fast freight train
                            1. / 3.: 0.25,  # Slow commuter train
                            1. / 4.: 0.25}  # Slow freight train

    
    if test_malfunctions_enabled:
        env = RailEnv(width=x_dim,
                      height=y_dim,
                      rail_generator=sparse_rail_generator(max_num_cities=max_num_cities,
                                                           # Number of cities in map (where train stations are)
                                                           seed=14,  # Random seed
                                                           grid_mode=False,
                                                           max_rails_between_cities=max_rails_between_cities,
                                                               max_rails_in_city=max_rails_in_city),
                    schedule_generator=sparse_schedule_generator(speed_ration_map),
                    malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
                    number_of_agents=test_n_agents,
                    obs_builder_object=TreeObservation)
    else:
        env = RailEnv(width=x_dim,
                      height=y_dim,
                      rail_generator=sparse_rail_generator(max_num_cities=max_num_cities,
                                                           # Number of cities in map (where train stations are)
                                                           seed=14,  # Random seed
                                                           grid_mode=False,
                                                           max_rails_between_cities=max_rails_between_cities,
                                                               max_rails_in_city=max_rails_in_city),
                    schedule_generator=sparse_schedule_generator(speed_ration_map),
                    number_of_agents=test_n_agents,
                    obs_builder_object=TreeObservation)
    
    env.reset()

    #env_renderer = RenderTool(env, gl="PILSVG", )
    env_renderer = RenderTool(env, gl="PILSVG",
                          agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX,
                          show_debug=False,
                          screen_height=(1080*0.8),  # Adjust these parameters to fit your resolution
                          screen_width=(1920*0.8))
    num_features_per_node = env.obs_builder.observation_dim

    
    nr_nodes = 0
    for i in range(tree_depth + 1):
        nr_nodes += np.power(4, i)
    state_size = num_features_per_node * nr_nodes
    action_size = 5

    # We set the number of episodes we would like to train on
    if 'n_trials' not in locals():
        n_trials = 15000
    
    # max_steps computation
    speed_weighted_mean = 0

    for key in speed_ration_map.keys():
        speed_weighted_mean += key * speed_ration_map[key]
    
    #max_steps = int(3 * (env.height + env.width))
    max_steps = int((1/speed_weighted_mean) * 3 * (env.height + env.width))
    #eps = 1.
    #eps_end = 0.005
    #eps_decay = 0.9995

    # And some variables to keep track of the performance
    action_dict = dict()
    final_action_dict = dict()
    action_prob_list = []
    scores_window = deque(maxlen=100)
    done_window = deque(maxlen=100)
    scores = []
    scores_list = []
    deadlock_list =[]
    dones_list_window = []
    dones_list = []
    action_prob = [0] * action_size
    agent_obs = [None] * env.get_num_agents()
    agent_next_obs = [None] * env.get_num_agents() # Useless
    agent = Agent(state_size, action_size)
    
    # LOAD MODEL WEIGHTS TO TEST
    agent.qnetwork_local.load_state_dict(torch.load(path.join('NetsTest' , 'navigator_checkpoint3800_multi10_deadlock_global10.pth')))

    record_images = False
    frame_step = 0

    for trials in range(1, n_trials + 1):

        # Reset environment
        obs, info = env.reset()#(True, True)
        env_renderer.reset()
        # Build agent specific observations
        for a in range(env.get_num_agents()):
            agent_obs[a] = agent_obs[a] = normalize_observation(obs[a], tree_depth, observation_radius=10)
        # Reset score and done
        score = 0
        env_done = 0

        # Run episode
        for step in range(max_steps):

            # Action
            for a in range(env.get_num_agents()):
                if info['action_required'][a]:
                    action = agent.act(agent_obs[a], eps=0.)
                    action_prob[action] += 1

                else:
                    action = 0

                action_dict.update({a: action})
            # Environment step
            obs, all_rewards, done, deadlocks, info = env.step(action_dict)

            env_renderer.render_env(show=True, show_predictions=True, show_observations=False)
            # Build agent specific observations and normalize
            for a in range(env.get_num_agents()):
                if obs[a]:
                    agent_obs[a] = normalize_observation(obs[a], tree_depth, observation_radius=10)

                score += all_rewards[a] / env.get_num_agents()


            if done['__all__']:
                break

        # Collection information about training
        tasks_finished = 0
        for _idx in range(env.get_num_agents()):
            if done[_idx] == 1:
                tasks_finished += 1
        done_window.append(tasks_finished / max(1, env.get_num_agents()))
        scores_window.append(score / max_steps)  # save most recent score
        scores.append(np.mean(scores_window))
        dones_list.append(tasks_finished / max(1, env.get_num_agents()))
        dones_list_window.append((np.mean(done_window)))
        scores_list.append(score / max_steps)
        deadlock_list.append(deadlocks.count(1)/max(1, env.get_num_agents()))

        if (np.sum(action_prob) == 0):
            action_prob_normalized = [0] * action_size
        else:
            action_prob_normalized = action_prob / np.sum(action_prob)



        print(
                '\rTesting {} Agents on ({},{}).\t Episode {}\t Score: {:.3f}\tDones: {:.2f}%\tDeadlocks: {:.2f}\t Action Probabilities: \t {}'.format(
                    env.get_num_agents(), x_dim, y_dim,
                    trials,
                    score / max_steps,
                    100 * tasks_finished / max(1, env.get_num_agents()),
                    deadlocks.count(1)/max(1, env.get_num_agents()),
                    action_prob_normalized), end=" ")

        #if trials % 100 == 0:
        action_prob_list.append(action_prob_normalized)
        action_prob = [0] * action_size

        if trials % 50 == 0:

            np.savetxt(fname=path.join('NetsTest' , 'test_metrics.csv'), X=np.transpose(np.asarray([scores_list,scores,dones_list,dones_list_window,deadlock_list])), delimiter=';',newline='\n')
            np.savetxt(fname=path.join('NetsTest' , 'test_action_prob.csv'), X=np.asarray(action_prob_list), delimiter=';',newline='\n')
Ejemplo n.º 29
0
def train_agent(env_params, train_params):
    # Environment parameters
    n_agents = env_params.n_agents
    x_dim = env_params.x_dim
    y_dim = env_params.y_dim
    n_cities = env_params.n_cities
    max_rails_between_cities = env_params.max_rails_between_cities
    max_rails_in_city = env_params.max_rails_in_city
    seed = env_params.seed

    # Observation parameters
    observation_tree_depth = env_params.observation_tree_depth
    observation_radius = env_params.observation_radius
    observation_max_path_depth = env_params.observation_max_path_depth

    # Training parameters
    eps_start = train_params.eps_start
    eps_end = train_params.eps_end
    eps_decay = train_params.eps_decay
    n_episodes = train_params.n_episodes
    checkpoint_interval = train_params.checkpoint_interval
    n_eval_episodes = train_params.n_evaluation_episodes

    # Set the seeds
    random.seed(seed)
    np.random.seed(seed)

    # Break agents from time to time
    malfunction_parameters = MalfunctionParameters(
        malfunction_rate=1. / 10000,  # Rate of malfunctions
        min_duration=15,  # Minimal duration
        max_duration=50  # Max duration
    )

    # Observation builder
    predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth)
    tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth,
                                         predictor=predictor)

    # Fraction of train which each speed
    speed_profiles = {
        1.: 1.0,  # Fast passenger train
        1. / 2.: 0.0,  # Fast freight train
        1. / 3.: 0.0,  # Slow commuter train
        1. / 4.: 0.0  # Slow freight train
    }

    # Setup the environment
    env = RailEnv(
        width=x_dim,
        height=y_dim,
        rail_generator=sparse_rail_generator(
            max_num_cities=n_cities,
            grid_mode=False,
            max_rails_between_cities=max_rails_between_cities,
            max_rails_in_city=max_rails_in_city),
        schedule_generator=sparse_schedule_generator(speed_profiles),
        number_of_agents=n_agents,
        malfunction_generator_and_process_data=malfunction_from_params(
            malfunction_parameters),
        obs_builder_object=tree_observation,
        random_seed=seed)

    env.reset(regenerate_schedule=True, regenerate_rail=True)

    # Setup renderer
    if train_params.render:
        env_renderer = RenderTool(env, gl="PGL")

    # Calculate the state size given the depth of the tree observation and the number of features
    n_features_per_node = env.obs_builder.observation_dim
    n_nodes = 0
    for i in range(observation_tree_depth + 1):
        n_nodes += np.power(4, i)
    state_size = n_features_per_node * n_nodes

    # The action space of flatland is 5 discrete actions
    action_size = 5

    # Max number of steps per episode
    # This is the official formula used during evaluations
    # See details in flatland.envs.schedule_generators.sparse_schedule_generator
    max_steps = int(4 * 2 * (env.height + env.width + (n_agents / n_cities)))

    action_count = [0] * action_size
    action_dict = dict()
    agent_obs = [None] * env.get_num_agents()
    agent_prev_obs = [None] * env.get_num_agents()
    agent_prev_action = [2] * env.get_num_agents()
    update_values = False
    smoothed_normalized_score = -1.0
    smoothed_eval_normalized_score = -1.0
    smoothed_completion = 0.0
    smoothed_eval_completion = 0.0

    # Double Dueling DQN policy
    policy = DDDQNPolicy(state_size, action_size, train_params)

    # TensorBoard writer
    writer = SummaryWriter()
    writer.add_hparams(vars(train_params), {})
    writer.add_hparams(vars(env_params), {})

    training_timer = Timer()
    training_timer.start()

    print(
        "\n🚉 Training {} trains on {}x{} grid for {} episodes, evaluating on {} episodes every {} episodes.\n"
        .format(env.get_num_agents(), x_dim, y_dim, n_episodes,
                n_eval_episodes, checkpoint_interval))

    for episode_idx in range(n_episodes + 1):
        # Timers
        step_timer = Timer()
        reset_timer = Timer()
        learn_timer = Timer()
        preproc_timer = Timer()

        # Reset environment
        reset_timer.start()
        obs, info = env.reset(regenerate_rail=True, regenerate_schedule=True)
        reset_timer.end()

        if train_params.render:
            env_renderer.set_new_rail()

        score = 0
        nb_steps = 0
        actions_taken = []

        # Build agent specific observations
        for agent in env.get_agent_handles():
            if obs[agent]:
                agent_obs[agent] = normalize_observation(
                    obs[agent],
                    observation_tree_depth,
                    observation_radius=observation_radius)
                agent_prev_obs[agent] = agent_obs[agent].copy()

        # Run episode
        for step in range(max_steps - 1):
            for agent in env.get_agent_handles():
                if info['action_required'][agent]:
                    # If an action is required, we want to store the obs at that step as well as the action
                    update_values = True
                    action = policy.act(agent_obs[agent], eps=eps_start)
                    action_count[action] += 1
                    actions_taken.append(action)
                else:
                    update_values = False
                    action = 0
                action_dict.update({agent: action})

            # Environment step
            step_timer.start()
            next_obs, all_rewards, done, info = env.step(action_dict)
            step_timer.end()

            if train_params.render and episode_idx % checkpoint_interval == 0:
                env_renderer.render_env(show=True,
                                        frames=False,
                                        show_observations=False,
                                        show_predictions=False)

            for agent in range(env.get_num_agents()):
                # Update replay buffer and train agent
                # Only update the values when we are done or when an action was taken and thus relevant information is present
                if update_values or done[agent]:
                    learn_timer.start()
                    policy.step(agent_prev_obs[agent],
                                agent_prev_action[agent], all_rewards[agent],
                                agent_obs[agent], done[agent])
                    learn_timer.end()

                    agent_prev_obs[agent] = agent_obs[agent].copy()
                    agent_prev_action[agent] = action_dict[agent]

                # Preprocess the new observations
                if next_obs[agent]:
                    preproc_timer.start()
                    agent_obs[agent] = normalize_observation(
                        next_obs[agent],
                        observation_tree_depth,
                        observation_radius=observation_radius)
                    preproc_timer.end()

                score += all_rewards[agent]

            nb_steps = step

            if done['__all__']:
                break

        # Epsilon decay
        eps_start = max(eps_end, eps_decay * eps_start)

        # Collection information about training
        tasks_finished = sum(done[idx] for idx in env.get_agent_handles())
        completion = tasks_finished / max(1, env.get_num_agents())
        normalized_score = score / (max_steps * env.get_num_agents())
        action_probs = action_count / np.sum(action_count)
        action_count = [1] * action_size

        # Smoothed values for terminal display and for more stable hyper-parameter tuning
        smoothing = 0.99
        smoothed_normalized_score = smoothed_normalized_score * smoothing + normalized_score * (
            1.0 - smoothing)
        smoothed_completion = smoothed_completion * smoothing + completion * (
            1.0 - smoothing)

        # Print logs
        if episode_idx % checkpoint_interval == 0:
            torch.save(
                policy.qnetwork_local,
                './checkpoints/origin_multi-' + str(episode_idx) + '.pth')
            if train_params.render:
                env_renderer.close_window()

        print('\r🚂 Episode {}'
              '\t 🏆 Score: {:.3f}'
              ' Avg: {:.3f}'
              '\t 💯 Done: {:.2f}%'
              ' Avg: {:.2f}%'
              '\t 🎲 Epsilon: {:.2f} '
              '\t 🔀 Action Probs: {}'.format(episode_idx, normalized_score,
                                             smoothed_normalized_score,
                                             100 * completion,
                                             100 * smoothed_completion,
                                             eps_start,
                                             format_action_prob(action_probs)),
              end=" ")

        # Evaluate policy
        if episode_idx % train_params.checkpoint_interval == 0:
            scores, completions, nb_steps_eval = eval_policy(
                env, policy, n_eval_episodes, max_steps)
            writer.add_scalar("evaluation/scores_min", np.min(scores),
                              episode_idx)
            writer.add_scalar("evaluation/scores_max", np.max(scores),
                              episode_idx)
            writer.add_scalar("evaluation/scores_mean", np.mean(scores),
                              episode_idx)
            writer.add_scalar("evaluation/scores_std", np.std(scores),
                              episode_idx)
            writer.add_histogram("evaluation/scores", np.array(scores),
                                 episode_idx)
            writer.add_scalar("evaluation/completions_min",
                              np.min(completions), episode_idx)
            writer.add_scalar("evaluation/completions_max",
                              np.max(completions), episode_idx)
            writer.add_scalar("evaluation/completions_mean",
                              np.mean(completions), episode_idx)
            writer.add_scalar("evaluation/completions_std",
                              np.std(completions), episode_idx)
            writer.add_histogram("evaluation/completions",
                                 np.array(completions), episode_idx)
            writer.add_scalar("evaluation/nb_steps_min", np.min(nb_steps_eval),
                              episode_idx)
            writer.add_scalar("evaluation/nb_steps_max", np.max(nb_steps_eval),
                              episode_idx)
            writer.add_scalar("evaluation/nb_steps_mean",
                              np.mean(nb_steps_eval), episode_idx)
            writer.add_scalar("evaluation/nb_steps_std", np.std(nb_steps_eval),
                              episode_idx)
            writer.add_histogram("evaluation/nb_steps",
                                 np.array(nb_steps_eval), episode_idx)

            smoothing = 0.9
            smoothed_eval_normalized_score = smoothed_eval_normalized_score * smoothing + np.mean(
                scores) * (1.0 - smoothing)
            smoothed_eval_completion = smoothed_eval_completion * smoothing + np.mean(
                completions) * (1.0 - smoothing)
            writer.add_scalar("evaluation/smoothed_score",
                              smoothed_eval_normalized_score, episode_idx)
            writer.add_scalar("evaluation/smoothed_completion",
                              smoothed_eval_completion, episode_idx)

        # Save logs to tensorboard
        writer.add_scalar("training/score", normalized_score, episode_idx)
        writer.add_scalar("training/smoothed_score", smoothed_normalized_score,
                          episode_idx)
        writer.add_scalar("training/completion", np.mean(completion),
                          episode_idx)
        writer.add_scalar("training/smoothed_completion",
                          np.mean(smoothed_completion), episode_idx)
        writer.add_scalar("training/nb_steps", nb_steps, episode_idx)
        writer.add_histogram("actions/distribution", np.array(actions_taken),
                             episode_idx)
        writer.add_scalar("actions/nothing",
                          action_probs[RailEnvActions.DO_NOTHING], episode_idx)
        writer.add_scalar("actions/left",
                          action_probs[RailEnvActions.MOVE_LEFT], episode_idx)
        writer.add_scalar("actions/forward",
                          action_probs[RailEnvActions.MOVE_FORWARD],
                          episode_idx)
        writer.add_scalar("actions/right",
                          action_probs[RailEnvActions.MOVE_RIGHT], episode_idx)
        writer.add_scalar("actions/stop",
                          action_probs[RailEnvActions.STOP_MOVING],
                          episode_idx)
        writer.add_scalar("training/epsilon", eps_start, episode_idx)
        writer.add_scalar("training/buffer_size", len(policy.memory),
                          episode_idx)
        writer.add_scalar("training/loss", policy.loss, episode_idx)
        writer.add_scalar("timer/reset", reset_timer.get(), episode_idx)
        writer.add_scalar("timer/step", step_timer.get(), episode_idx)
        writer.add_scalar("timer/learn", learn_timer.get(), episode_idx)
        writer.add_scalar("timer/preproc", preproc_timer.get(), episode_idx)
        writer.add_scalar("timer/total", training_timer.get_current(),
                          episode_idx)
Ejemplo n.º 30
0
scores = []
losses = []
env_count = 0
best_reward = -np.inf

for episode_count in range(n_episodes):
    # print("EPISODE: ",episode_count)

    env_params = get_env(env_count)
    env = RailEnv(
        width=env_params['x_dim'],
        height=env_params['y_dim'],
        rail_generator=sparse_rail_generator(
            max_num_cities=env_params['n_cities'],
            grid_mode=False,
            max_rails_between_cities=max_rails_between_cities,
            max_rails_in_city=env_params['max_rails_in_city']),
        schedule_generator=sparse_schedule_generator(speed_profiles),
        number_of_agents=env_params['n_agents'],
        malfunction_generator_and_process_data=malfunction_from_params(
            malfunction_parameters),
        obs_builder_object=tree_observation,
        random_seed=42)
    env_count += 1

    obs, info = env.reset(True, True)

    max_steps = int(4 * 2 *
                    (env.height + env.width +
                     (env_params['n_agents'] / env_params['n_cities'])))