예제 #1
0
def fine_tune(config, run, env: RailEnv):
    """
    Fine-tune the agent on a static env at evaluation time
    """
    RailEnvPersister.save(env, CURRENT_ENV_PATH)
    num_agents = env.get_num_agents()
    tune_time = get_tune_time(num_agents)

    def env_creator(env_config):
        return FlatlandSparse(env_config,
                              fine_tune_env_path=CURRENT_ENV_PATH,
                              max_steps=num_agents * 100)

    register_env("flatland_sparse", env_creator)
    config['num_workers'] = 3
    config['num_envs_per_worker'] = 1
    config['lr'] = 0.00001 * num_agents
    exp_an = ray.tune.run(run["agent"],
                          reuse_actors=True,
                          verbose=1,
                          stop={"time_since_restore": tune_time},
                          checkpoint_freq=1,
                          keep_checkpoints_num=1,
                          checkpoint_score_attr="episode_reward_mean",
                          config=config,
                          restore=run["checkpoint_path"])

    trial: Trial = exp_an.trials[0]
    agent_config = trial.config
    agent_config['num_workers'] = 0
    agent = trial.get_trainable_cls()(env=config["env"], config=trial.config)
    checkpoint = exp_an.get_trial_checkpoints_paths(
        trial, metric="episode_reward_mean")
    agent.restore(checkpoint[0][0])
    return agent
예제 #2
0
def parallel_plan(planning_function: Callable, env: RailEnv, **kwargs):
    RailEnvPersister.save(env, CURRENT_ENV_PATH)
    with concurrent.futures.ProcessPoolExecutor(
            max_workers=cpu_count()) as worker_pool:
        best_action_results = []
        best_pc_results = []
        best_return_results = []

        futures = [
            worker_pool.submit(planning_function, **kwargs)
            for _ in range(cpu_count())
        ]

        for future in concurrent.futures.as_completed(futures):
            best_actions, best_pc, best_return = future.result()
            if best_pc == 1.0:
                print(f'MAX PC: {best_pc}, MAX RETURN: {best_return}\n')
                for f in futures:
                    f.cancel()
                return best_actions

            best_action_results.append(best_actions)
            best_pc_results.append(best_pc)
            best_return_results.append(best_return)

        for f in futures:
            f.cancel()

        return best_action_results[int(np.argmax(best_pc))]
예제 #3
0
def test_save_load():
    env = RailEnv(width=10, height=10,
                  rail_generator=complex_rail_generator(nr_start_goal=2, nr_extra=5, min_dist=6, seed=1),
                  schedule_generator=complex_schedule_generator(), number_of_agents=2)
    env.reset()
    agent_1_pos = env.agents[0].position
    agent_1_dir = env.agents[0].direction
    agent_1_tar = env.agents[0].target
    agent_2_pos = env.agents[1].position
    agent_2_dir = env.agents[1].direction
    agent_2_tar = env.agents[1].target
    
    os.makedirs("tmp", exist_ok=True)

    RailEnvPersister.save(env, "tmp/test_save.pkl")
    env.save("tmp/test_save_2.pkl")

    #env.load("test_save.dat")
    env, env_dict = RailEnvPersister.load_new("tmp/test_save.pkl")
    assert (env.width == 10)
    assert (env.height == 10)
    assert (len(env.agents) == 2)
    assert (agent_1_pos == env.agents[0].position)
    assert (agent_1_dir == env.agents[0].direction)
    assert (agent_1_tar == env.agents[0].target)
    assert (agent_2_pos == env.agents[1].position)
    assert (agent_2_dir == env.agents[1].direction)
    assert (agent_2_tar == env.agents[1].target)
예제 #4
0
 def save(self, path):
     '''
     Save the given RailEnv environment as pickle
     '''
     filename = os.path.join(
         path, f"{self.width}x{self.height}-{self.random_seed}.pkl")
     RailEnvPersister.save(self, filename)
예제 #5
0
def load_env(env_dict, obs_builder_object=GlobalObsForRailEnv()):
    """
    Loads an env
    """
    env = RailEnv(height=4, width=4, obs_builder_object=obs_builder_object)
    env.reset(regenerate_rail=False, regenerate_schedule=False)
    RailEnvPersister.set_full_state(env, env_dict)
    return env
예제 #6
0
def test_rail_env_reset():
    file_name = "test_rail_env_reset.pkl"

    # Test to save and load file.

    rail, rail_map = make_simple_rail()

    env = RailEnv(width=rail_map.shape[1], height=rail_map.shape[0], rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(), number_of_agents=3,
                  obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()))
    env.reset()

    #env.save(file_name)
    RailEnvPersister.save(env, file_name)

    dist_map_shape = np.shape(env.distance_map.get())
    rails_initial = env.rail.grid
    agents_initial = env.agents

    #env2 = RailEnv(width=1, height=1, rail_generator=rail_from_file(file_name),
    #               schedule_generator=schedule_from_file(file_name), number_of_agents=1,
    #               obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()))
    #env2.reset(False, False, False)
    env2, env2_dict = RailEnvPersister.load_new(file_name)

    rails_loaded = env2.rail.grid
    agents_loaded = env2.agents

    assert np.all(np.array_equal(rails_initial, rails_loaded))
    assert agents_initial == agents_loaded

    env3 = RailEnv(width=1, height=1, rail_generator=rail_from_file(file_name),
                   schedule_generator=schedule_from_file(file_name), number_of_agents=1,
                   obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()))
    env3.reset(False, True, False)
    rails_loaded = env3.rail.grid
    agents_loaded = env3.agents

    assert np.all(np.array_equal(rails_initial, rails_loaded))
    assert agents_initial == agents_loaded

    env4 = RailEnv(width=1, height=1, rail_generator=rail_from_file(file_name),
                   schedule_generator=schedule_from_file(file_name), number_of_agents=1,
                   obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()))
    env4.reset(True, False, False)
    rails_loaded = env4.rail.grid
    agents_loaded = env4.agents

    assert np.all(np.array_equal(rails_initial, rails_loaded))
    assert agents_initial == agents_loaded
def test_malfanction_to_and_from_file():
    """
    Test loading malfunction from
    Returns
    -------

    """
    stochastic_data = MalfunctionParameters(
        malfunction_rate=1000,  # Rate of malfunction occurence
        min_duration=2,  # Minimal duration of malfunction
        max_duration=5  # Max duration of malfunction
    )

    rail, rail_map = make_simple_rail2()

    env = RailEnv(
        width=25,
        height=30,
        rail_generator=rail_from_grid_transition_map(rail),
        schedule_generator=random_schedule_generator(),
        number_of_agents=10,
        malfunction_generator_and_process_data=malfunction_from_params(
            stochastic_data))
    env.reset()
    #env.save("./malfunction_saving_loading_tests.pkl")
    RailEnvPersister.save(env, "./malfunction_saving_loading_tests.pkl")

    malfunction_generator, malfunction_process_data = malfunction_from_file(
        "./malfunction_saving_loading_tests.pkl")
    env2 = RailEnv(
        width=25,
        height=30,
        rail_generator=rail_from_grid_transition_map(rail),
        schedule_generator=random_schedule_generator(),
        number_of_agents=10,
        malfunction_generator_and_process_data=malfunction_from_params(
            stochastic_data))

    env2.reset()

    assert env2.malfunction_process_data == env.malfunction_process_data
    assert env2.malfunction_process_data.malfunction_rate == 1000
    assert env2.malfunction_process_data.min_duration == 2
    assert env2.malfunction_process_data.max_duration == 5
예제 #8
0
def test_load_env():
    #env = RailEnv(10, 10)
    #env.reset()
    # env.load_resource('env_data.tests', 'test-10x10.mpk')
    env, env_dict = RailEnvPersister.load_resource("env_data.tests", "test-10x10.mpk")
    #env, env_dict = RailEnvPersister.load_new("./env_data/tests/test-10x10.mpk")

    agent_static = EnvAgent((0, 0), 2, (5, 5), False)
    env.add_agent(agent_static)
    assert env.get_num_agents() == 1
예제 #9
0
    def load(self):
        if os.path.exists(self.env_filename):
            self.log("load file: ", self.env_filename)
            #self.env.load(self.env_filename)
            RailEnvPersister.load(self.env, self.env_filename)
            if not self.regen_size_height == self.env.height or not self.regen_size_width == self.env.width:
                self.regen_size_height = self.env.height
                self.regen_size_width = self.env.width
                self.regenerate(None, 0, self.env)
                RailEnvPersister.load(self.env, self.env_filename)

            self.env.reset_agents()
            self.env.reset(False, False)
            self.view.oRT.update_background()
            self.fix_env()
            self.set_env(self.env)
            self.redraw()
        else:
            self.log("File does not exist:", self.env_filename, " Working directory: ", os.getcwd())
예제 #10
0
def test_save_load_mpk():
    env = RailEnv(width=10, height=10,
                  rail_generator=complex_rail_generator(nr_start_goal=2, nr_extra=5, min_dist=6, seed=1),
                  schedule_generator=complex_schedule_generator(), number_of_agents=2)
    env.reset()

    os.makedirs("tmp", exist_ok=True)

    RailEnvPersister.save(env, "tmp/test_save.mpk")

    #env.load("test_save.dat")
    env2, env_dict = RailEnvPersister.load_new("tmp/test_save.mpk")
    assert (env.width == env2.width)
    assert (env.height == env2.height)
    assert (len(env2.agents) == len(env.agents))
    
    for agent1, agent2 in zip(env.agents, env2.agents):
        assert(agent1.position == agent2.position)
        assert(agent1.direction == agent2.direction)
        assert(agent1.target == agent2.target)
예제 #11
0
def create_and_save_env(file_name: str, schedule_generator: ScheduleGenerator,
                        rail_generator: RailGenerator):
    stochastic_data = MalfunctionParameters(
        malfunction_rate=1000,  # Rate of malfunction occurence
        min_duration=15,  # Minimal duration of malfunction
        max_duration=50  # Max duration of malfunction
    )

    env = RailEnv(
        width=30,
        height=30,
        rail_generator=rail_generator,
        schedule_generator=schedule_generator,
        number_of_agents=10,
        malfunction_generator_and_process_data=malfunction_from_params(
            stochastic_data),
        remove_agents_at_target=True)
    env.reset(True, True)
    #env.save(file_name)
    RailEnvPersister.save(env, file_name)
def test_get_shortest_paths_agent_handle():
    #env = load_flatland_environment_from_file('Level_distance_map_shortest_path.pkl', 'env_data.tests')
    env, _ = RailEnvPersister.load_new("Level_distance_map_shortest_path.mpk",
                                       "env_data.tests")
    env.reset()
    actual = get_shortest_paths(env.distance_map, agent_handle=6)

    print(actual, file=sys.stderr)

    expected = {
        6: [
            Waypoint(position=(5, 5), direction=0),
            Waypoint(position=(4, 5), direction=0),
            Waypoint(position=(3, 5), direction=0),
            Waypoint(position=(2, 5), direction=0),
            Waypoint(position=(1, 5), direction=0),
            Waypoint(position=(0, 5), direction=0),
            Waypoint(position=(0, 6), direction=1),
            Waypoint(position=(0, 7), direction=1),
            Waypoint(position=(0, 8), direction=1),
            Waypoint(position=(0, 9), direction=1),
            Waypoint(position=(0, 10), direction=1),
            Waypoint(position=(1, 10), direction=2),
            Waypoint(position=(2, 10), direction=2),
            Waypoint(position=(3, 10), direction=2),
            Waypoint(position=(4, 10), direction=2),
            Waypoint(position=(5, 10), direction=2),
            Waypoint(position=(6, 10), direction=2),
            Waypoint(position=(7, 10), direction=2),
            Waypoint(position=(8, 10), direction=2),
            Waypoint(position=(9, 10), direction=2),
            Waypoint(position=(10, 10), direction=2),
            Waypoint(position=(11, 10), direction=2),
            Waypoint(position=(12, 10), direction=2),
            Waypoint(position=(13, 10), direction=2),
            Waypoint(position=(14, 10), direction=2),
            Waypoint(position=(15, 10), direction=2),
            Waypoint(position=(16, 10), direction=2),
            Waypoint(position=(17, 10), direction=2),
            Waypoint(position=(18, 10), direction=2),
            Waypoint(position=(19, 10), direction=2),
            Waypoint(position=(20, 10), direction=2),
            Waypoint(position=(20, 9), direction=3),
            Waypoint(position=(20, 8), direction=3),
            Waypoint(position=(21, 8), direction=2),
            Waypoint(position=(21, 7), direction=3),
            Waypoint(position=(21, 6), direction=3),
            Waypoint(position=(21, 5), direction=3)
        ]
    }

    for agent_handle in expected:
        assert np.array_equal(actual[agent_handle], expected[agent_handle]), \
            "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
예제 #13
0
    def _launch(self):
        rail_generator = self.get_rail_generator()

        malfunction_generator = NoMalfunctionGen()
        if {'malfunction_rate', 'malfunction_min_duration', 'malfunction_max_duration'} <= self._config.keys():
            print("MALFUNCTIONS POSSIBLE")
            params = MalfunctionParameters(malfunction_rate=1 / self._config['malfunction_rate'],
                                           max_duration=self._config['malfunction_max_duration'],
                                           min_duration=self._config['malfunction_min_duration'])
            malfunction_generator = ParamMalfunctionGen(params)

        speed_ratio_map = None
        if 'speed_ratio_map' in self._config:
            speed_ratio_map = {
                float(k): float(v) for k, v in self._config['speed_ratio_map'].items()
            }
        if self._gym_env_class == SequentialFlatlandGymEnv:
            schedule_generator = SequentialSparseSchedGen(speed_ratio_map, seed=1)
        else:
            schedule_generator = sparse_schedule_generator(speed_ratio_map)

        env = None
        try:
            if self._fine_tune_env_path is None:
                env = RailEnv(
                    width=self._config['width'],
                    height=self._config['height'],
                    rail_generator=rail_generator,
                    schedule_generator=schedule_generator,
                    number_of_agents=self._config['number_of_agents'],
                    malfunction_generator=malfunction_generator,
                    obs_builder_object=self._observation.builder(),
                    remove_agents_at_target=True,
                    random_seed=self._config['seed'],
                    use_renderer=self._env_config.get('render')
                )
                env.reset()
            else:
                env, _ = RailEnvPersister.load_new(self._fine_tune_env_path)
                env.reset(regenerate_rail=False, regenerate_schedule=False)
                env.obs_builder = self._observation.builder()
                env.obs_builder.set_env(env)

        except ValueError as e:
            logging.error("=" * 50)
            logging.error(f"Error while creating env: {e}")
            logging.error("=" * 50)

        return env
def test_get_shortest_paths_max_depth():
    #env = load_flatland_environment_from_file('test_002.pkl', 'env_data.tests')
    env, _ = RailEnvPersister.load_new("test_002.mpk", "env_data.tests")
    env.reset()
    actual = get_shortest_paths(env.distance_map, max_depth=2)

    expected = {
        0: [
            Waypoint(position=(1, 1), direction=1),
            Waypoint(position=(1, 2), direction=1)
        ],
        1: [
            Waypoint(position=(3, 18), direction=3),
            Waypoint(position=(3, 17), direction=3),
        ]
    }

    for agent_handle in expected:
        assert np.array_equal(actual[agent_handle], expected[agent_handle]), \
            "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
def test_get_shortest_paths():
    #env = load_flatland_environment_from_file('test_002.mpk', 'env_data.tests')
    env, env_dict = RailEnvPersister.load_new("test_002.mpk", "env_data.tests")

    #print("env len(agents): ", len(env.agents))
    #print(env.distance_map)
    #print("env number_of_agents:", env.number_of_agents)

    #print("env agents:", env.agents)

    #env.distance_map.reset(env.agents, env.rail)

    #actual = get_shortest_paths(env.distance_map)
    #print("shortest paths:", actual)

    #print(env.distance_map)
    #print("Dist map agents:", env.distance_map.agents)

    #print("\nenv reset()")
    env.reset()
    actual = get_shortest_paths(env.distance_map)
    #print("env agents: ", len(env.agents))
    #print("env number_of_agents: ", env.number_of_agents)

    assert len(
        actual) == 2, "get_shortest_paths should return a dict of length 2"

    expected = {
        0: [
            Waypoint(position=(1, 1), direction=1),
            Waypoint(position=(1, 2), direction=1),
            Waypoint(position=(1, 3), direction=1),
            Waypoint(position=(2, 3), direction=2),
            Waypoint(position=(2, 4), direction=1),
            Waypoint(position=(2, 5), direction=1),
            Waypoint(position=(2, 6), direction=1),
            Waypoint(position=(2, 7), direction=1),
            Waypoint(position=(2, 8), direction=1),
            Waypoint(position=(2, 9), direction=1),
            Waypoint(position=(2, 10), direction=1),
            Waypoint(position=(2, 11), direction=1),
            Waypoint(position=(2, 12), direction=1),
            Waypoint(position=(2, 13), direction=1),
            Waypoint(position=(2, 14), direction=1),
            Waypoint(position=(2, 15), direction=1),
            Waypoint(position=(2, 16), direction=1),
            Waypoint(position=(2, 17), direction=1),
            Waypoint(position=(2, 18), direction=1)
        ],
        1: [
            Waypoint(position=(3, 18), direction=3),
            Waypoint(position=(3, 17), direction=3),
            Waypoint(position=(3, 16), direction=3),
            Waypoint(position=(2, 16), direction=0),
            Waypoint(position=(2, 15), direction=3),
            Waypoint(position=(2, 14), direction=3),
            Waypoint(position=(2, 13), direction=3),
            Waypoint(position=(2, 12), direction=3),
            Waypoint(position=(2, 11), direction=3),
            Waypoint(position=(2, 10), direction=3),
            Waypoint(position=(2, 9), direction=3),
            Waypoint(position=(2, 8), direction=3),
            Waypoint(position=(2, 7), direction=3),
            Waypoint(position=(2, 6), direction=3),
            Waypoint(position=(2, 5), direction=3),
            Waypoint(position=(2, 4), direction=3),
            Waypoint(position=(2, 3), direction=3),
            Waypoint(position=(2, 2), direction=3),
            Waypoint(position=(2, 1), direction=3)
        ]
    }

    for agent_handle in expected:
        assert np.array_equal(actual[agent_handle], expected[agent_handle]), \
            "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
예제 #16
0
def test_rail_environment_single_agent(show=False):
    # We instantiate the following map on a 3x3 grid
    #  _  _
    # / \/ \
    # | |  |
    # \_/\_/

    transitions = RailEnvTransitions()
    
    
    
    if False:
        # This env creation doesn't quite work right.
        cells = transitions.transition_list
        vertical_line = cells[1]
        south_symmetrical_switch = cells[6]
        north_symmetrical_switch = transitions.rotate_transition(south_symmetrical_switch, 180)
        south_east_turn = int('0100000000000010', 2)
        south_west_turn = transitions.rotate_transition(south_east_turn, 90)
        north_east_turn = transitions.rotate_transition(south_east_turn, 270)
        north_west_turn = transitions.rotate_transition(south_east_turn, 180)

        rail_map = np.array([[south_east_turn, south_symmetrical_switch,
                            south_west_turn],
                            [vertical_line, vertical_line, vertical_line],
                            [north_east_turn, north_symmetrical_switch,
                            north_west_turn]],
                            dtype=np.uint16)

        rail = GridTransitionMap(width=3, height=3, transitions=transitions)
        rail.grid = rail_map
        rail_env = RailEnv(width=3, height=3, rail_generator=rail_from_grid_transition_map(rail),
                        schedule_generator=random_schedule_generator(), number_of_agents=1,
                        obs_builder_object=GlobalObsForRailEnv())
    else:
        rail_env, env_dict = RailEnvPersister.load_new("test_env_loop.pkl", "env_data.tests")
        rail_map = rail_env.rail.grid
    
    rail_env._max_episode_steps = 1000

    _ = rail_env.reset(False, False, True)

    liActions = [int(a) for a in RailEnvActions]

    env_renderer = RenderTool(rail_env)

    #RailEnvPersister.save(rail_env, "test_env_figure8.pkl")
    
    for _ in range(5):

        #rail_env.agents[0].initial_position = (1,2)
        _ = rail_env.reset(False, False, True)

        # We do not care about target for the moment
        agent = rail_env.agents[0]
        agent.target = [-1, -1]

        # Check that trains are always initialized at a consistent position
        # or direction.
        # They should always be able to go somewhere.
        if show:
            print("After reset - agent pos:", agent.position, "dir: ", agent.direction)
            print(transitions.get_transitions(rail_map[agent.position], agent.direction))

        #assert (transitions.get_transitions(
        #    rail_map[agent.position],
        #    agent.direction) != (0, 0, 0, 0))

        # HACK - force the direction to one we know is good.
        #agent.initial_position = agent.position = (2,3)
        agent.initial_direction = agent.direction = 0

        if show:
            print ("handle:", agent.handle)
        #agent.initial_position = initial_pos = agent.position

        valid_active_actions_done = 0
        pos = agent.position

        if show:
            env_renderer.render_env(show=show, show_agents=True)
            time.sleep(0.01)

        iStep = 0
        while valid_active_actions_done < 6:
            # We randomly select an action
            action = np.random.choice(liActions)
            #action = RailEnvActions.MOVE_FORWARD

            _, _, dict_done, _ = rail_env.step({0: action})

            prev_pos = pos
            pos = agent.position  # rail_env.agents_position[0]

            print("action:", action, "pos:", agent.position, "prev:", prev_pos, agent.direction)
            print(dict_done)
            if prev_pos != pos:
                valid_active_actions_done += 1
            iStep += 1
            
            if show:
                env_renderer.render_env(show=show, show_agents=True, step=iStep)
                time.sleep(0.01)
            assert iStep < 100, "valid actions should have been performed by now - hung agent"

        # After 6 movements on this railway network, the train should be back
        # to its original height on the map.
        #assert (initial_pos[0] == agent.position[0])

        # We check that the train always attains its target after some time
        for _ in range(10):
            _ = rail_env.reset()

            rail_env.agents[0].direction = 0

            # JW - to avoid problem with random_schedule_generator.
            #rail_env.agents[0].position = (1,2)

            iStep = 0
            while iStep < 100:
                # We randomly select an action
                action = np.random.choice(liActions)

                _, _, dones, _ = rail_env.step({0: action})
                done = dones['__all__']
                if done:
                    break
                iStep +=1
                assert iStep < 100, "agent should have finished by now"
                env_renderer.render_env(show=show)
예제 #17
0
 def save(self):
     self.log("save to ", self.env_filename, " working dir: ", os.getcwd())
     #self.env.save(self.env_filename)
     RailEnvPersister.save(self.env, self.env_filename)
예제 #18
0
        print(f"Cuda initialised: {torch.cuda.is_initialized()}")
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    model = LinearModel(device=device,
                        input_size=231,
                        layer_sizes=[5],
                        output_size=1)

    replay_buffer: ReplayBuffer = SimpleReplayBuffer(buffer_size=10000,
                                                     batch_size=16)

    base_env = env_creator()
    base_env.reset()
    base_env_dict = RailEnvPersister.get_full_state(env=base_env)

    controller_arguments = {
        "model": model,
        "action_size": 5,
    }

    controller_creator = partial(DQNController, **controller_arguments)
    master_controller = controller_creator()

    if multiprocess:
        try:
            print(f"Distributed available: {distributed.is_available()}")
            set_start_method("spawn")
            master_controller.model.share_memory()
        except Exception as e: