def fine_tune(config, run, env: RailEnv): """ Fine-tune the agent on a static env at evaluation time """ RailEnvPersister.save(env, CURRENT_ENV_PATH) num_agents = env.get_num_agents() tune_time = get_tune_time(num_agents) def env_creator(env_config): return FlatlandSparse(env_config, fine_tune_env_path=CURRENT_ENV_PATH, max_steps=num_agents * 100) register_env("flatland_sparse", env_creator) config['num_workers'] = 3 config['num_envs_per_worker'] = 1 config['lr'] = 0.00001 * num_agents exp_an = ray.tune.run(run["agent"], reuse_actors=True, verbose=1, stop={"time_since_restore": tune_time}, checkpoint_freq=1, keep_checkpoints_num=1, checkpoint_score_attr="episode_reward_mean", config=config, restore=run["checkpoint_path"]) trial: Trial = exp_an.trials[0] agent_config = trial.config agent_config['num_workers'] = 0 agent = trial.get_trainable_cls()(env=config["env"], config=trial.config) checkpoint = exp_an.get_trial_checkpoints_paths( trial, metric="episode_reward_mean") agent.restore(checkpoint[0][0]) return agent
def parallel_plan(planning_function: Callable, env: RailEnv, **kwargs): RailEnvPersister.save(env, CURRENT_ENV_PATH) with concurrent.futures.ProcessPoolExecutor( max_workers=cpu_count()) as worker_pool: best_action_results = [] best_pc_results = [] best_return_results = [] futures = [ worker_pool.submit(planning_function, **kwargs) for _ in range(cpu_count()) ] for future in concurrent.futures.as_completed(futures): best_actions, best_pc, best_return = future.result() if best_pc == 1.0: print(f'MAX PC: {best_pc}, MAX RETURN: {best_return}\n') for f in futures: f.cancel() return best_actions best_action_results.append(best_actions) best_pc_results.append(best_pc) best_return_results.append(best_return) for f in futures: f.cancel() return best_action_results[int(np.argmax(best_pc))]
def test_save_load(): env = RailEnv(width=10, height=10, rail_generator=complex_rail_generator(nr_start_goal=2, nr_extra=5, min_dist=6, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=2) env.reset() agent_1_pos = env.agents[0].position agent_1_dir = env.agents[0].direction agent_1_tar = env.agents[0].target agent_2_pos = env.agents[1].position agent_2_dir = env.agents[1].direction agent_2_tar = env.agents[1].target os.makedirs("tmp", exist_ok=True) RailEnvPersister.save(env, "tmp/test_save.pkl") env.save("tmp/test_save_2.pkl") #env.load("test_save.dat") env, env_dict = RailEnvPersister.load_new("tmp/test_save.pkl") assert (env.width == 10) assert (env.height == 10) assert (len(env.agents) == 2) assert (agent_1_pos == env.agents[0].position) assert (agent_1_dir == env.agents[0].direction) assert (agent_1_tar == env.agents[0].target) assert (agent_2_pos == env.agents[1].position) assert (agent_2_dir == env.agents[1].direction) assert (agent_2_tar == env.agents[1].target)
def save(self, path): ''' Save the given RailEnv environment as pickle ''' filename = os.path.join( path, f"{self.width}x{self.height}-{self.random_seed}.pkl") RailEnvPersister.save(self, filename)
def load_env(env_dict, obs_builder_object=GlobalObsForRailEnv()): """ Loads an env """ env = RailEnv(height=4, width=4, obs_builder_object=obs_builder_object) env.reset(regenerate_rail=False, regenerate_schedule=False) RailEnvPersister.set_full_state(env, env_dict) return env
def test_rail_env_reset(): file_name = "test_rail_env_reset.pkl" # Test to save and load file. rail, rail_map = make_simple_rail() env = RailEnv(width=rail_map.shape[1], height=rail_map.shape[0], rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=3, obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())) env.reset() #env.save(file_name) RailEnvPersister.save(env, file_name) dist_map_shape = np.shape(env.distance_map.get()) rails_initial = env.rail.grid agents_initial = env.agents #env2 = RailEnv(width=1, height=1, rail_generator=rail_from_file(file_name), # schedule_generator=schedule_from_file(file_name), number_of_agents=1, # obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())) #env2.reset(False, False, False) env2, env2_dict = RailEnvPersister.load_new(file_name) rails_loaded = env2.rail.grid agents_loaded = env2.agents assert np.all(np.array_equal(rails_initial, rails_loaded)) assert agents_initial == agents_loaded env3 = RailEnv(width=1, height=1, rail_generator=rail_from_file(file_name), schedule_generator=schedule_from_file(file_name), number_of_agents=1, obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())) env3.reset(False, True, False) rails_loaded = env3.rail.grid agents_loaded = env3.agents assert np.all(np.array_equal(rails_initial, rails_loaded)) assert agents_initial == agents_loaded env4 = RailEnv(width=1, height=1, rail_generator=rail_from_file(file_name), schedule_generator=schedule_from_file(file_name), number_of_agents=1, obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())) env4.reset(True, False, False) rails_loaded = env4.rail.grid agents_loaded = env4.agents assert np.all(np.array_equal(rails_initial, rails_loaded)) assert agents_initial == agents_loaded
def test_malfanction_to_and_from_file(): """ Test loading malfunction from Returns ------- """ stochastic_data = MalfunctionParameters( malfunction_rate=1000, # Rate of malfunction occurence min_duration=2, # Minimal duration of malfunction max_duration=5 # Max duration of malfunction ) rail, rail_map = make_simple_rail2() env = RailEnv( width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=10, malfunction_generator_and_process_data=malfunction_from_params( stochastic_data)) env.reset() #env.save("./malfunction_saving_loading_tests.pkl") RailEnvPersister.save(env, "./malfunction_saving_loading_tests.pkl") malfunction_generator, malfunction_process_data = malfunction_from_file( "./malfunction_saving_loading_tests.pkl") env2 = RailEnv( width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=10, malfunction_generator_and_process_data=malfunction_from_params( stochastic_data)) env2.reset() assert env2.malfunction_process_data == env.malfunction_process_data assert env2.malfunction_process_data.malfunction_rate == 1000 assert env2.malfunction_process_data.min_duration == 2 assert env2.malfunction_process_data.max_duration == 5
def test_load_env(): #env = RailEnv(10, 10) #env.reset() # env.load_resource('env_data.tests', 'test-10x10.mpk') env, env_dict = RailEnvPersister.load_resource("env_data.tests", "test-10x10.mpk") #env, env_dict = RailEnvPersister.load_new("./env_data/tests/test-10x10.mpk") agent_static = EnvAgent((0, 0), 2, (5, 5), False) env.add_agent(agent_static) assert env.get_num_agents() == 1
def load(self): if os.path.exists(self.env_filename): self.log("load file: ", self.env_filename) #self.env.load(self.env_filename) RailEnvPersister.load(self.env, self.env_filename) if not self.regen_size_height == self.env.height or not self.regen_size_width == self.env.width: self.regen_size_height = self.env.height self.regen_size_width = self.env.width self.regenerate(None, 0, self.env) RailEnvPersister.load(self.env, self.env_filename) self.env.reset_agents() self.env.reset(False, False) self.view.oRT.update_background() self.fix_env() self.set_env(self.env) self.redraw() else: self.log("File does not exist:", self.env_filename, " Working directory: ", os.getcwd())
def test_save_load_mpk(): env = RailEnv(width=10, height=10, rail_generator=complex_rail_generator(nr_start_goal=2, nr_extra=5, min_dist=6, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=2) env.reset() os.makedirs("tmp", exist_ok=True) RailEnvPersister.save(env, "tmp/test_save.mpk") #env.load("test_save.dat") env2, env_dict = RailEnvPersister.load_new("tmp/test_save.mpk") assert (env.width == env2.width) assert (env.height == env2.height) assert (len(env2.agents) == len(env.agents)) for agent1, agent2 in zip(env.agents, env2.agents): assert(agent1.position == agent2.position) assert(agent1.direction == agent2.direction) assert(agent1.target == agent2.target)
def create_and_save_env(file_name: str, schedule_generator: ScheduleGenerator, rail_generator: RailGenerator): stochastic_data = MalfunctionParameters( malfunction_rate=1000, # Rate of malfunction occurence min_duration=15, # Minimal duration of malfunction max_duration=50 # Max duration of malfunction ) env = RailEnv( width=30, height=30, rail_generator=rail_generator, schedule_generator=schedule_generator, number_of_agents=10, malfunction_generator_and_process_data=malfunction_from_params( stochastic_data), remove_agents_at_target=True) env.reset(True, True) #env.save(file_name) RailEnvPersister.save(env, file_name)
def test_get_shortest_paths_agent_handle(): #env = load_flatland_environment_from_file('Level_distance_map_shortest_path.pkl', 'env_data.tests') env, _ = RailEnvPersister.load_new("Level_distance_map_shortest_path.mpk", "env_data.tests") env.reset() actual = get_shortest_paths(env.distance_map, agent_handle=6) print(actual, file=sys.stderr) expected = { 6: [ Waypoint(position=(5, 5), direction=0), Waypoint(position=(4, 5), direction=0), Waypoint(position=(3, 5), direction=0), Waypoint(position=(2, 5), direction=0), Waypoint(position=(1, 5), direction=0), Waypoint(position=(0, 5), direction=0), Waypoint(position=(0, 6), direction=1), Waypoint(position=(0, 7), direction=1), Waypoint(position=(0, 8), direction=1), Waypoint(position=(0, 9), direction=1), Waypoint(position=(0, 10), direction=1), Waypoint(position=(1, 10), direction=2), Waypoint(position=(2, 10), direction=2), Waypoint(position=(3, 10), direction=2), Waypoint(position=(4, 10), direction=2), Waypoint(position=(5, 10), direction=2), Waypoint(position=(6, 10), direction=2), Waypoint(position=(7, 10), direction=2), Waypoint(position=(8, 10), direction=2), Waypoint(position=(9, 10), direction=2), Waypoint(position=(10, 10), direction=2), Waypoint(position=(11, 10), direction=2), Waypoint(position=(12, 10), direction=2), Waypoint(position=(13, 10), direction=2), Waypoint(position=(14, 10), direction=2), Waypoint(position=(15, 10), direction=2), Waypoint(position=(16, 10), direction=2), Waypoint(position=(17, 10), direction=2), Waypoint(position=(18, 10), direction=2), Waypoint(position=(19, 10), direction=2), Waypoint(position=(20, 10), direction=2), Waypoint(position=(20, 9), direction=3), Waypoint(position=(20, 8), direction=3), Waypoint(position=(21, 8), direction=2), Waypoint(position=(21, 7), direction=3), Waypoint(position=(21, 6), direction=3), Waypoint(position=(21, 5), direction=3) ] } for agent_handle in expected: assert np.array_equal(actual[agent_handle], expected[agent_handle]), \ "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
def _launch(self): rail_generator = self.get_rail_generator() malfunction_generator = NoMalfunctionGen() if {'malfunction_rate', 'malfunction_min_duration', 'malfunction_max_duration'} <= self._config.keys(): print("MALFUNCTIONS POSSIBLE") params = MalfunctionParameters(malfunction_rate=1 / self._config['malfunction_rate'], max_duration=self._config['malfunction_max_duration'], min_duration=self._config['malfunction_min_duration']) malfunction_generator = ParamMalfunctionGen(params) speed_ratio_map = None if 'speed_ratio_map' in self._config: speed_ratio_map = { float(k): float(v) for k, v in self._config['speed_ratio_map'].items() } if self._gym_env_class == SequentialFlatlandGymEnv: schedule_generator = SequentialSparseSchedGen(speed_ratio_map, seed=1) else: schedule_generator = sparse_schedule_generator(speed_ratio_map) env = None try: if self._fine_tune_env_path is None: env = RailEnv( width=self._config['width'], height=self._config['height'], rail_generator=rail_generator, schedule_generator=schedule_generator, number_of_agents=self._config['number_of_agents'], malfunction_generator=malfunction_generator, obs_builder_object=self._observation.builder(), remove_agents_at_target=True, random_seed=self._config['seed'], use_renderer=self._env_config.get('render') ) env.reset() else: env, _ = RailEnvPersister.load_new(self._fine_tune_env_path) env.reset(regenerate_rail=False, regenerate_schedule=False) env.obs_builder = self._observation.builder() env.obs_builder.set_env(env) except ValueError as e: logging.error("=" * 50) logging.error(f"Error while creating env: {e}") logging.error("=" * 50) return env
def test_get_shortest_paths_max_depth(): #env = load_flatland_environment_from_file('test_002.pkl', 'env_data.tests') env, _ = RailEnvPersister.load_new("test_002.mpk", "env_data.tests") env.reset() actual = get_shortest_paths(env.distance_map, max_depth=2) expected = { 0: [ Waypoint(position=(1, 1), direction=1), Waypoint(position=(1, 2), direction=1) ], 1: [ Waypoint(position=(3, 18), direction=3), Waypoint(position=(3, 17), direction=3), ] } for agent_handle in expected: assert np.array_equal(actual[agent_handle], expected[agent_handle]), \ "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
def test_get_shortest_paths(): #env = load_flatland_environment_from_file('test_002.mpk', 'env_data.tests') env, env_dict = RailEnvPersister.load_new("test_002.mpk", "env_data.tests") #print("env len(agents): ", len(env.agents)) #print(env.distance_map) #print("env number_of_agents:", env.number_of_agents) #print("env agents:", env.agents) #env.distance_map.reset(env.agents, env.rail) #actual = get_shortest_paths(env.distance_map) #print("shortest paths:", actual) #print(env.distance_map) #print("Dist map agents:", env.distance_map.agents) #print("\nenv reset()") env.reset() actual = get_shortest_paths(env.distance_map) #print("env agents: ", len(env.agents)) #print("env number_of_agents: ", env.number_of_agents) assert len( actual) == 2, "get_shortest_paths should return a dict of length 2" expected = { 0: [ Waypoint(position=(1, 1), direction=1), Waypoint(position=(1, 2), direction=1), Waypoint(position=(1, 3), direction=1), Waypoint(position=(2, 3), direction=2), Waypoint(position=(2, 4), direction=1), Waypoint(position=(2, 5), direction=1), Waypoint(position=(2, 6), direction=1), Waypoint(position=(2, 7), direction=1), Waypoint(position=(2, 8), direction=1), Waypoint(position=(2, 9), direction=1), Waypoint(position=(2, 10), direction=1), Waypoint(position=(2, 11), direction=1), Waypoint(position=(2, 12), direction=1), Waypoint(position=(2, 13), direction=1), Waypoint(position=(2, 14), direction=1), Waypoint(position=(2, 15), direction=1), Waypoint(position=(2, 16), direction=1), Waypoint(position=(2, 17), direction=1), Waypoint(position=(2, 18), direction=1) ], 1: [ Waypoint(position=(3, 18), direction=3), Waypoint(position=(3, 17), direction=3), Waypoint(position=(3, 16), direction=3), Waypoint(position=(2, 16), direction=0), Waypoint(position=(2, 15), direction=3), Waypoint(position=(2, 14), direction=3), Waypoint(position=(2, 13), direction=3), Waypoint(position=(2, 12), direction=3), Waypoint(position=(2, 11), direction=3), Waypoint(position=(2, 10), direction=3), Waypoint(position=(2, 9), direction=3), Waypoint(position=(2, 8), direction=3), Waypoint(position=(2, 7), direction=3), Waypoint(position=(2, 6), direction=3), Waypoint(position=(2, 5), direction=3), Waypoint(position=(2, 4), direction=3), Waypoint(position=(2, 3), direction=3), Waypoint(position=(2, 2), direction=3), Waypoint(position=(2, 1), direction=3) ] } for agent_handle in expected: assert np.array_equal(actual[agent_handle], expected[agent_handle]), \ "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
def test_rail_environment_single_agent(show=False): # We instantiate the following map on a 3x3 grid # _ _ # / \/ \ # | | | # \_/\_/ transitions = RailEnvTransitions() if False: # This env creation doesn't quite work right. cells = transitions.transition_list vertical_line = cells[1] south_symmetrical_switch = cells[6] north_symmetrical_switch = transitions.rotate_transition(south_symmetrical_switch, 180) south_east_turn = int('0100000000000010', 2) south_west_turn = transitions.rotate_transition(south_east_turn, 90) north_east_turn = transitions.rotate_transition(south_east_turn, 270) north_west_turn = transitions.rotate_transition(south_east_turn, 180) rail_map = np.array([[south_east_turn, south_symmetrical_switch, south_west_turn], [vertical_line, vertical_line, vertical_line], [north_east_turn, north_symmetrical_switch, north_west_turn]], dtype=np.uint16) rail = GridTransitionMap(width=3, height=3, transitions=transitions) rail.grid = rail_map rail_env = RailEnv(width=3, height=3, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=1, obs_builder_object=GlobalObsForRailEnv()) else: rail_env, env_dict = RailEnvPersister.load_new("test_env_loop.pkl", "env_data.tests") rail_map = rail_env.rail.grid rail_env._max_episode_steps = 1000 _ = rail_env.reset(False, False, True) liActions = [int(a) for a in RailEnvActions] env_renderer = RenderTool(rail_env) #RailEnvPersister.save(rail_env, "test_env_figure8.pkl") for _ in range(5): #rail_env.agents[0].initial_position = (1,2) _ = rail_env.reset(False, False, True) # We do not care about target for the moment agent = rail_env.agents[0] agent.target = [-1, -1] # Check that trains are always initialized at a consistent position # or direction. # They should always be able to go somewhere. if show: print("After reset - agent pos:", agent.position, "dir: ", agent.direction) print(transitions.get_transitions(rail_map[agent.position], agent.direction)) #assert (transitions.get_transitions( # rail_map[agent.position], # agent.direction) != (0, 0, 0, 0)) # HACK - force the direction to one we know is good. #agent.initial_position = agent.position = (2,3) agent.initial_direction = agent.direction = 0 if show: print ("handle:", agent.handle) #agent.initial_position = initial_pos = agent.position valid_active_actions_done = 0 pos = agent.position if show: env_renderer.render_env(show=show, show_agents=True) time.sleep(0.01) iStep = 0 while valid_active_actions_done < 6: # We randomly select an action action = np.random.choice(liActions) #action = RailEnvActions.MOVE_FORWARD _, _, dict_done, _ = rail_env.step({0: action}) prev_pos = pos pos = agent.position # rail_env.agents_position[0] print("action:", action, "pos:", agent.position, "prev:", prev_pos, agent.direction) print(dict_done) if prev_pos != pos: valid_active_actions_done += 1 iStep += 1 if show: env_renderer.render_env(show=show, show_agents=True, step=iStep) time.sleep(0.01) assert iStep < 100, "valid actions should have been performed by now - hung agent" # After 6 movements on this railway network, the train should be back # to its original height on the map. #assert (initial_pos[0] == agent.position[0]) # We check that the train always attains its target after some time for _ in range(10): _ = rail_env.reset() rail_env.agents[0].direction = 0 # JW - to avoid problem with random_schedule_generator. #rail_env.agents[0].position = (1,2) iStep = 0 while iStep < 100: # We randomly select an action action = np.random.choice(liActions) _, _, dones, _ = rail_env.step({0: action}) done = dones['__all__'] if done: break iStep +=1 assert iStep < 100, "agent should have finished by now" env_renderer.render_env(show=show)
def save(self): self.log("save to ", self.env_filename, " working dir: ", os.getcwd()) #self.env.save(self.env_filename) RailEnvPersister.save(self.env, self.env_filename)
print(f"Cuda initialised: {torch.cuda.is_initialized()}") device = torch.device("cuda") else: device = torch.device("cpu") model = LinearModel(device=device, input_size=231, layer_sizes=[5], output_size=1) replay_buffer: ReplayBuffer = SimpleReplayBuffer(buffer_size=10000, batch_size=16) base_env = env_creator() base_env.reset() base_env_dict = RailEnvPersister.get_full_state(env=base_env) controller_arguments = { "model": model, "action_size": 5, } controller_creator = partial(DQNController, **controller_arguments) master_controller = controller_creator() if multiprocess: try: print(f"Distributed available: {distributed.is_available()}") set_start_method("spawn") master_controller.model.share_memory() except Exception as e: