def test_save_load(): env = RailEnv(width=10, height=10, rail_generator=complex_rail_generator(nr_start_goal=2, nr_extra=5, min_dist=6, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=2) env.reset() agent_1_pos = env.agents[0].position agent_1_dir = env.agents[0].direction agent_1_tar = env.agents[0].target agent_2_pos = env.agents[1].position agent_2_dir = env.agents[1].direction agent_2_tar = env.agents[1].target os.makedirs("tmp", exist_ok=True) RailEnvPersister.save(env, "tmp/test_save.pkl") env.save("tmp/test_save_2.pkl") #env.load("test_save.dat") env, env_dict = RailEnvPersister.load_new("tmp/test_save.pkl") assert (env.width == 10) assert (env.height == 10) assert (len(env.agents) == 2) assert (agent_1_pos == env.agents[0].position) assert (agent_1_dir == env.agents[0].direction) assert (agent_1_tar == env.agents[0].target) assert (agent_2_pos == env.agents[1].position) assert (agent_2_dir == env.agents[1].direction) assert (agent_2_tar == env.agents[1].target)
def test_save_load(): env = RailEnv(width=10, height=10, rail_generator=complex_rail_generator(nr_start_goal=2, nr_extra=5, min_dist=6, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=2) env.reset() agent_1_pos = env.agents[0].position agent_1_dir = env.agents[0].direction agent_1_tar = env.agents[0].target agent_2_pos = env.agents[1].position agent_2_dir = env.agents[1].direction agent_2_tar = env.agents[1].target env.save("test_save.dat") env.load("test_save.dat") assert (env.width == 10) assert (env.height == 10) assert (len(env.agents) == 2) assert (agent_1_pos == env.agents[0].position) assert (agent_1_dir == env.agents[0].direction) assert (agent_1_tar == env.agents[0].target) assert (agent_2_pos == env.agents[1].position) assert (agent_2_dir == env.agents[1].direction) assert (agent_2_tar == env.agents[1].target)
def create_testfiles(parameters, test_nr=0, nr_trials_per_test=100): # Parameter initialization print('Creating {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format( test_nr, parameters[0], parameters[1], parameters[2])) # Reset environment random.seed(parameters[3]) np.random.seed(parameters[3]) nr_paths = max(4, parameters[2] + int(0.5 * parameters[2])) min_dist = int(min([parameters[0], parameters[1]]) * 0.75) env = RailEnv(width=parameters[0], height=parameters[1], rail_generator=complex_rail_generator(nr_start_goal=nr_paths, nr_extra=5, min_dist=min_dist, max_dist=99999, seed=parameters[3]), schedule_generator=complex_schedule_generator(), obs_builder_object=TreeObsForRailEnv(max_depth=2), number_of_agents=parameters[2]) printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) for trial in range(nr_trials_per_test): # Reset the env env.reset(True, True) env.save("./Tests/{}/Level_{}.pkl".format(test_nr, trial)) printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) return
def demo(args=None): """Demo script to check installation""" env = RailEnv(width=15, height=15, rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999), schedule_generator=complex_schedule_generator(), number_of_agents=5) env._max_episode_steps = int(15 * (env.width + env.height)) env_renderer = RenderTool(env) while True: obs, info = env.reset() _done = False # Run a single episode here step = 0 while not _done: # Compute Action _action = {} for _idx, _ in enumerate(env.agents): _action[_idx] = np.random.randint(0, 5) obs, all_rewards, done, _ = env.step(_action) _done = done['__all__'] step += 1 env_renderer.render_env(show=True, frames=False, show_observations=False, show_predictions=False) time.sleep(0.3) return 0
def gen_env(number_agents, width, height, n_start_goal, seed): speed_ration_map = { 1.: 0.25, # Fast passenger train 1. / 2.: 0.25, # Fast freight train 1. / 3.: 0.25, # Slow commuter train 1. / 4.: 0.25 } # Slow freight train env = RailEnv(width=width, height=height, rail_generator=complex_rail_generator( nr_start_goal=n_start_goal, nr_extra=3, min_dist=6, max_dist=99999, seed=seed), schedule_generator=complex_schedule_generator( speed_ratio_map=speed_ration_map), number_of_agents=number_agents, obs_builder_object=TreeObsForRailEnv(max_depth=5)) env.reset() env.step(dict(zip(range(number_agents), [2] * number_agents))) return env
def test_normalize_features(): random.seed(1) np.random.seed(1) max_depth = 4 for i in range(10): tree_observer = TreeObsForRailEnv(max_depth=max_depth) next_rand_number = random.randint(0, 100) env = RailEnv(width=10, height=10, rail_generator=complex_rail_generator( nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999, seed=next_rand_number), schedule_generator=complex_schedule_generator(), number_of_agents=1, obs_builder_object=tree_observer) obs, all_rewards, done, _ = env.step({0: 0}) obs_new = tree_observer.get() # data, distance, agent_data = split_tree(tree=np.array(obs_old), num_features_per_node=11) data_normalized = normalize_observation(obs_new, max_depth, observation_radius=10) filename = 'testdata/test_array_{}.csv'.format(i) data_loaded = np.loadtxt(filename, delimiter=',') assert np.allclose(data_loaded, data_normalized)
def create_env(number_agents,width,height,n_start_goal,seed): env = RailEnv(width=width, height=height, rail_generator=complex_rail_generator(nr_start_goal=n_start_goal, nr_extra=1, min_dist=6, max_dist=99999, seed = seed), schedule_generator=complex_schedule_generator(), number_of_agents=number_agents) return env
def main(args): try: opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""]) except getopt.GetoptError as err: print(str(err)) # will print something like "option -a not recognized" sys.exit(2) sleep_for_animation = True for o, a in opts: if o in ("--sleep-for-animation"): sleep_for_animation = str2bool(a) else: assert False, "unhandled option" # Initiate the Predictor custom_predictor = ShortestPathPredictorForRailEnv(10) # Pass the Predictor to the observation builder custom_obs_builder = ObservePredictions(custom_predictor) # Initiate Environment env = RailEnv(width=10, height=10, rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=1, min_dist=8, max_dist=99999, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=3, obs_builder_object=custom_obs_builder) obs, info = env.reset() env_renderer = RenderTool(env, gl="PILSVG") # We render the initial step and show the obsered cells as colored boxes env_renderer.render_env(show=True, frames=True, show_observations=True, show_predictions=False) action_dict = {} for step in range(100): for a in range(env.get_num_agents()): action = np.random.randint(0, 5) action_dict[a] = action obs, all_rewards, done, _ = env.step(action_dict) print("Rewards: ", all_rewards, " [done=", done, "]") env_renderer.render_env(show=True, frames=True, show_observations=True, show_predictions=False) if sleep_for_animation: time.sleep(0.5)
def test_complex_rail_generator(): n_agents = 10 n_start = 2 x_dim = 10 y_dim = 10 min_dist = 4 # Check that agent number is changed to fit generated level env = RailEnv(width=x_dim, height=y_dim, rail_generator=complex_rail_generator(nr_start_goal=n_start, nr_extra=0, min_dist=min_dist), schedule_generator=complex_schedule_generator(), number_of_agents=n_agents) env.reset() assert env.get_num_agents() == 2 assert env.rail.grid.shape == (y_dim, x_dim) min_dist = 2 * x_dim # Check that no agents are generated when level cannot be generated env = RailEnv(width=x_dim, height=y_dim, rail_generator=complex_rail_generator(nr_start_goal=n_start, nr_extra=0, min_dist=min_dist), schedule_generator=complex_schedule_generator(), number_of_agents=n_agents) env.reset() assert env.get_num_agents() == 0 assert env.rail.grid.shape == (y_dim, x_dim) # Check that everything stays the same when correct parameters are given min_dist = 2 n_start = 5 n_agents = 5 env = RailEnv(width=x_dim, height=y_dim, rail_generator=complex_rail_generator(nr_start_goal=n_start, nr_extra=0, min_dist=min_dist), schedule_generator=complex_schedule_generator(), number_of_agents=n_agents) env.reset() assert env.get_num_agents() == n_agents assert env.rail.grid.shape == (y_dim, x_dim)
def env_creator(): """ Creates an env and returns it """ return RailEnv(width=20, height=30, rail_generator=complex_rail_generator(nr_start_goal=100, nr_extra=2, min_dist=8, max_dist=99999, seed=False), schedule_generator=complex_schedule_generator(seed=False), obs_builder_object=GlobalObsForRailEnv(), number_of_agents=3, random_seed=True)
def test_multi_speed_init(): env = RailEnv(width=50, height=50, rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=5) # Initialize the agent with the parameters corresponding to the environment and observation_builder agent = RandomAgent(218, 4) # Empty dictionary for all agent action action_dict = dict() # Set all the different speeds # Reset environment and get initial observations for all agents env.reset(False, False, True) # Here you can also further enhance the provided observation by means of normalization # See training navigation example in the baseline repository old_pos = [] for i_agent in range(env.get_num_agents()): env.agents[i_agent].speed_data['speed'] = 1. / (i_agent + 1) old_pos.append(env.agents[i_agent].position) # Run episode for step in range(100): # Choose an action for each agent in the environment for a in range(env.get_num_agents()): action = agent.act(0) action_dict.update({a: action}) # Check that agent did not move in between its speed updates assert old_pos[a] == env.agents[a].position # Environment step which returns the observations for all agents, their corresponding # reward and whether they are done _, _, _, _ = env.step(action_dict) # Update old position whenever an agent was allowed to move for i_agent in range(env.get_num_agents()): if (step + 1) % (i_agent + 1) == 0: print(step, i_agent, env.agents[i_agent].position) old_pos[i_agent] = env.agents[i_agent].position
def test_rail_env_speed_intializer(): speed_ratio_map = {1: 0.3, 2: 0.4, 3: 0.1, 5: 0.2} env = RailEnv(width=50, height=50, rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=10) env.reset() actual_speeds = list(map(lambda agent: agent.speed_data['speed'], env.agents)) expected_speed_set = set(speed_ratio_map.keys()) # check that the number of speeds generated is correct assert len(actual_speeds) == env.get_num_agents() # check that only the speeds defined are generated assert all({(actual_speed in expected_speed_set) for actual_speed in actual_speeds})
def env_gradual_update(input_env, agent=False, hardness_lvl=1): agent_num = input_env.number_of_agents env_width = input_env.width + 4 env_height = input_env.height + 4 map_agent_ratio = int(np.round(((env_width + env_height) / 2) / 5 - 2)) if map_agent_ratio > 0: agent_num = int(np.round(((env_width + env_height) / 2) / 5 - 2)) else: agent_num = 1 if hardness_lvl == 1: rail_generator = complex_rail_generator(nr_start_goal=20, nr_extra=1, min_dist=9, max_dist=99999, seed=0) schedule_generator = complex_schedule_generator() else: rail_generator = sparse_rail_generator(nr_start_goal=9, nr_extra=1, min_dist=9, max_dist=99999, seed=0) schedule_generator = sparse_schedule_generator() global env, env_renderer, render if render: env_renderer.close_window() env = RailEnv(width=env_width, height=env_height, rail_generator=rail_generator, schedule_generator=schedule_generator, obs_builder_object=GlobalObsForRailEnv(), number_of_agents=agent_num) env_renderer = RenderTool(env)
def create_multi_agent_environment(dimension, num_agents, timed, seed): # Create new environment. env = RailEnv(width=dimension, height=dimension, rail_generator=complex_rail_generator( nr_start_goal=int(1.5 * num_agents), nr_extra=int(1.2 * num_agents), min_dist=int(floor(dimension / 2)), max_dist=99999, seed=0), schedule_generator=complex_schedule_generator(timed=timed), malfunction_generator_and_process_data=None, number_of_agents=num_agents) env.reset(random_seed=int(seed)) return env
def create_env(nr_start_goal=10, nr_extra=2, min_dist=8, max_dist=99999, nr_agent=10, seed=0, render_mode='PIL'): env = RailEnv(width=30, height=30, rail_generator=complex_rail_generator( nr_start_goal, nr_extra, min_dist, max_dist, seed), schedule_generator=complex_schedule_generator(), obs_builder_object=GlobalObsForRailEnv(), number_of_agents=nr_agent) env_renderer = RenderTool(env, gl=render_mode) obs = env.reset() return env, env_renderer, obs
def test_schedule_from_file_complex(): """ Test to see that all parameters are loaded as expected Returns ------- """ # Different agent types (trains) with different speeds. speed_ration_map = { 1.: 0.25, # Fast passenger train 1. / 2.: 0.25, # Fast freight train 1. / 3.: 0.25, # Slow commuter train 1. / 4.: 0.25 } # Slow freight train # Generate complex test env rail_generator = complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999) schedule_generator = complex_schedule_generator(speed_ration_map) create_and_save_env(file_name="./complex_env_test.pkl", rail_generator=rail_generator, schedule_generator=schedule_generator) # Load the different envs and check the parameters # Complex generator rail_generator = rail_from_file("./complex_env_test.pkl") schedule_generator = schedule_from_file("./complex_env_test.pkl") complex_env_from_file = RailEnv(width=1, height=1, rail_generator=rail_generator, schedule_generator=schedule_generator) complex_env_from_file.reset(True, True) # Assert loaded agent number is correct assert complex_env_from_file.get_num_agents() == 10 # Assert max steps is correct assert complex_env_from_file._max_episode_steps == 1350
def test_save_load_mpk(): env = RailEnv(width=10, height=10, rail_generator=complex_rail_generator(nr_start_goal=2, nr_extra=5, min_dist=6, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=2) env.reset() os.makedirs("tmp", exist_ok=True) RailEnvPersister.save(env, "tmp/test_save.mpk") #env.load("test_save.dat") env2, env_dict = RailEnvPersister.load_new("tmp/test_save.mpk") assert (env.width == env2.width) assert (env.height == env2.height) assert (len(env2.agents) == len(env.agents)) for agent1, agent2 in zip(env.agents, env2.agents): assert(agent1.position == agent2.position) assert(agent1.direction == agent2.direction) assert(agent1.target == agent2.target)
def run_benchmark(): """Run benchmark on a small number of agents in complex rail environment.""" random.seed(1) np.random.seed(1) # Example generate a random rail env = RailEnv(width=15, height=15, rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12), schedule_generator=complex_schedule_generator(), number_of_agents=5) env.reset() n_trials = 20 action_dict = dict() action_prob = [0] * 4 for trials in range(1, n_trials + 1): # Reset environment obs, info = env.reset() # Run episode for step in range(100): # Action for a in range(env.get_num_agents()): action = np.random.randint(0, 4) action_prob[action] += 1 action_dict.update({a: action}) # Environment step next_obs, all_rewards, done, _ = env.step(action_dict) if done['__all__']: break if trials % 100 == 0: action_prob = [1] * 4
def create_env(seed=None): """ Helper function that creates an env everywhere This way it only needs to be defined here """ from flatland.envs.rail_env import RailEnv from flatland.envs.observations import TreeObsForRailEnv from flatland.envs.rail_generators import complex_rail_generator from flatland.envs.schedule_generators import complex_schedule_generator # TODO make more configurable env = RailEnv(width=20, height=20, obs_builder_object=TreeObsForRailEnv(2), rail_generator=complex_rail_generator(nr_start_goal=100, nr_extra=2, min_dist=8, max_dist=99999, seed=seed), schedule_generator=complex_schedule_generator(seed=seed), number_of_agents=3, random_seed=seed) return env
def env_random_update(input_env, decay, agent=False, hardness_lvl=1): agent_num = np.random.randint(1, 5) env_width = (agent_num + 2) * 5 env_height = (agent_num + 2) * 5 if hardness_lvl == 1: rail_generator = complex_rail_generator(nr_start_goal=20, nr_extra=1, min_dist=9, max_dist=99999, seed=0) schedule_generator = complex_schedule_generator() else: rail_generator = sparse_rail_generator(nr_start_goal=9, nr_extra=1, min_dist=9, max_dist=99999, seed=0) schedule_generator = sparse_schedule_generator() global env, env_renderer, render if render: env_renderer.close_window() env = RailEnv(width=env_width, height=env_height, rail_generator=rail_generator, schedule_generator=schedule_generator, obs_builder_object=GlobalObsForRailEnv(), number_of_agents=agent_num) env_renderer = RenderTool(env)
def main(args): try: opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""]) except getopt.GetoptError as err: print(str(err)) # will print something like "option -a not recognized" sys.exit(2) sleep_for_animation = True for o, a in opts: if o in ("--sleep-for-animation"): sleep_for_animation = str2bool(a) else: assert False, "unhandled option" env = RailEnv(width=7, height=7, rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=1, obs_builder_object=SingleAgentNavigationObs()) obs, info = env.reset() env_renderer = RenderTool(env) env_renderer.render_env(show=True, frames=True, show_observations=True) for step in range(100): action = np.argmax(obs[0]) + 1 obs, all_rewards, done, _ = env.step({0: action}) print("Rewards: ", all_rewards, " [done=", done, "]") env_renderer.render_env(show=True, frames=True, show_observations=True) if sleep_for_animation: time.sleep(0.1) if done["__all__"]: break env_renderer.close_window()
height, width, depth = _images[0].shape print(len(_images), height, width, depth) out = cv2.VideoWriter(f'video_{epoch}.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, (width, height)) [out.write(cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)) for image in _images] out.release() np.random.seed(1) # Use the complex_rail_generator to generate feasible network configurations with corresponding tasks # Training on simple small tasks is the best way to get familiar with the environment obs_builder = GlobalObsForRailEnv() env = RailEnv(width=20, height=20, rail_generator=complex_rail_generator(nr_start_goal=100, nr_extra=2, min_dist=8, max_dist=99999), schedule_generator=complex_schedule_generator(), obs_builder_object=obs_builder, number_of_agents=3) env.reset() env_renderer = RenderTool(env) # Import your own Agent or use RLlib to train agents on Flatland # As an example we use a random agent here agent_kwargs = {"state_size": 0, "action_size": 5} controller = RandomController(5) n_trials = 5 # Empty dictionary for all agent action action_dict = dict() print("Starting Training...")
def main(args): try: opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""]) except getopt.GetoptError as err: print(str(err)) # will print something like "option -a not recognized" sys.exit(2) sleep_for_animation = True for o, a in opts: if o in ("--sleep-for-animation"): sleep_for_animation = str2bool(a) else: assert False, "unhandled option" test_envs_root = "./railway" test_env_file_path = "testing_stuff.pkl" test_env_file_path = os.path.join( test_envs_root, test_env_file_path ) x_dim = 7 y_dim = 7 n_agents = 4 stochastic_data = {'prop_malfunction': 0.05, # Percentage of defective agents 'malfunction_rate': 100, # Rate of malfunction occurence 'min_duration': 20, # Minimal duration of malfunction 'max_duration': 50 # Max duration of malfunction } # Different agent types (trains) with different speeds. speed_ration_map = {1.: 0.25, # Fast passenger train 1. / 2.: 0.25, # Fast freight train 1. / 3.: 0.25, # Slow commuter train 1. / 4.: 0.25} # Slow freight train # env = RailEnv(width=1, height=1, rail_generator=rail_from_file(test_env_file_path), # schedule_generator=schedule_from_file(test_env_file_path), # #malfunction_generator_and_process_data=malfunction_from_file(test_env_file_path), # obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30))) # # #env.number_of_agents = n_agents # n_agents = env.number_of_agents env = RailEnv(width=x_dim, height=y_dim, rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=6, max_dist=99999,seed=1), # sparse_rail_generator(max_num_cities=3, # # Number of cities in map (where train stations are) # seed=1, # Random seed # grid_mode=False, # max_rails_between_cities=2, # max_rails_in_city=3), schedule_generator=complex_schedule_generator(speed_ration_map), number_of_agents=n_agents, malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), # # env = RailEnv(width=7, height=7, # rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999, # seed=1), schedule_generator=complex_schedule_generator(), # number_of_agents=n_agents, obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30))) max_steps = int(4 * 2 * (20 + env.height + env.width)) obs, info = env.reset(regenerate_rail=True, regenerate_schedule=True, random_seed=random_seed) env_renderer = RenderTool(env, gl="PILSVG") env_renderer.render_env(show=True, frames=True, show_observations=True) # Reset score and done score = 0 env_done = 0 step = 0 for step in range(max_steps): action_dict = {} for i in range(n_agents): if not obs: action_dict.update({i: 2}) elif obs[i] is not None: action = np.argmax(obs[i][1:4]) + 1 action_dict.update({i: action}) obs, all_rewards, done, _ = env.step(action_dict) print("Rewards: ", all_rewards, " [done=", done, "]") for a in range(env.get_num_agents()): score += all_rewards[a] / env.get_num_agents() env_renderer.render_env(show=True, frames=True, show_observations=True) if sleep_for_animation: time.sleep(0.5) if done["__all__"]: break # Collection information about training tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window = tasks_finished / max(1, env.get_num_agents()) scores_window = score / max_steps print( '\rTraining {} Agents on ({},{}).\t Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format( env.get_num_agents(), x_dim, y_dim, step, np.mean(scores_window), 100 * np.mean(done_window)), end=" ") tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window = tasks_finished / max(1, env.get_num_agents()) scores_window = score / max_steps print( '\rTraining {} Agents on ({},{}).\t Total Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format( env.get_num_agents(), x_dim, y_dim, step, np.mean(scores_window), 100 * np.mean(done_window)), end=" ") env_renderer.close_window()
def main(args): try: opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""]) except getopt.GetoptError as err: print(str(err)) # will print something like "option -a not recognized" sys.exit(2) sleep_for_animation = True for o, a in opts: if o in ("--sleep-for-animation"): sleep_for_animation = str2bool(a) else: assert False, "unhandled option" test_envs_root = "./railway" test_env_file_path = "testing_stuff.pkl" test_env_file_path = os.path.join(test_envs_root, test_env_file_path) x_dim = 7 y_dim = 7 n_agents = 3 stochastic_data = { 'prop_malfunction': 0.05, # Percentage of defective agents 'malfunction_rate': 100, # Rate of malfunction occurence 'min_duration': 2, # Minimal duration of malfunction 'max_duration': 5 # Max duration of malfunction } # Different agent types (trains) with different speeds. speed_ration_map = { 1.: 0.25, # Fast passenger train 1. / 2.: 0.25, # Fast freight train 1. / 3.: 0.25, # Slow commuter train 1. / 4.: 0.25 } # Slow freight train # env = RailEnv(width=1, height=1, rail_generator=rail_from_file(test_env_file_path), # schedule_generator=schedule_from_file(test_env_file_path), # #malfunction_generator_and_process_data=malfunction_from_file(test_env_file_path), # obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30))) # # #env.number_of_agents = n_agents # n_agents = env.number_of_agents env = RailEnv( width=x_dim, height=y_dim, rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=6, max_dist=99999, seed=1), # sparse_rail_generator(max_num_cities=3, # # Number of cities in map (where train stations are) # seed=1, # Random seed # grid_mode=False, # max_rails_between_cities=2, # max_rails_in_city=3), schedule_generator=complex_schedule_generator(speed_ration_map), number_of_agents=n_agents, malfunction_generator_and_process_data=malfunction_from_params( stochastic_data), # # env = RailEnv(width=7, height=7, # rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999, # seed=1), schedule_generator=complex_schedule_generator(), # number_of_agents=n_agents, obs_builder_object=MultipleAgentNavigationObs( max_depth=2, predictor=ShortestPathPredictorForRailEnv(30))) max_steps = int(4 * 2 * (20 + env.height + env.width)) obs, info = env.reset(regenerate_rail=True, regenerate_schedule=True, random_seed=random_seed) env_renderer = RenderTool(env, gl="PILSVG") env_renderer.render_env(show=True, frames=True, show_observations=True) # Reset score and done score = 0 env_done = 0 step = 0 for step in range(max_steps): for i in range(n_agents): if obs[i] is not None: observations, prediction_data, prediction_pos = obs[i] break action_dict = {} next_shortest_actions = 2 * np.ones(n_agents) next_next_shortest_actions = 2 * np.ones(n_agents) agent_conflicts = np.zeros((n_agents, n_agents)) agent_conflicts_count = np.zeros((n_agents, n_agents)) minDist = -1 * np.ones(n_agents) incDiff1 = -1 * np.ones(n_agents) incDiff2 = -1 * np.ones(n_agents) malfunc = np.zeros(n_agents) speed = np.ones(n_agents) pos_frac = np.ones(n_agents) agent_num_conflicts = [] vals = [] counts = [] counter = np.zeros(n_agents) for i in range(30): pos = prediction_pos[i] val, count = np.unique(pos, return_counts=True) if (val[0] == -1): val = val[1:] count = count[1:] vals.append(val) counts.append(count) for j, curVal in enumerate(val): #curVal = vals[i] curCount = count[j] if curCount > 1: idxs = np.argwhere(pos == curVal) lsIdx = [int(x) for x in idxs] combs = list(combinations(lsIdx, 2)) for k, comb in enumerate(combs): counter[comb[0]] += 1 counter[comb[1]] += 1 agent_conflicts_count[comb[0], comb[1]] = (counter[comb[0]] + counter[comb[1]]) / 2 if agent_conflicts[comb[0], comb[1]] == 0: agent_conflicts[comb[0], comb[1]] = i else: agent_conflicts[comb[0], comb[1]] = min( i, agent_conflicts[comb[0], comb[1]]) for i in range(n_agents): agent_num_conflicts.append(sum(agent_conflicts[i, :])) if not obs or obs is None or obs[i] is None: action_dict.update({i: 2}) elif obs[i][0] is not None: shortest_action = np.argmax(obs[i][0][1:4]) + 1 next_shortest_action = np.argmax(obs[i][0][5:7]) + 1 next_next_shortest_action = np.argmax(obs[i][0][8:10]) + 1 next_shortest_actions[i] = next_shortest_action next_next_shortest_actions[i] = next_next_shortest_action malfunc[i] = obs[i][0][-3] speed[i] = obs[i][0][-2] pos_frac[i] = obs[i][0][-1] minDist[i] = obs[i][0][0] incDiff1[i] = obs[i][0][-5] incDiff2[i] = obs[i][0][-4] action_dict.update({i: shortest_action}) else: action_dict.update({i: 2}) mal_agents = (np.array(-1)) for i in range(n_agents): if agent_num_conflicts[i] > 0: mal_agents = np.where(malfunc > 0) for i, mal_agent in enumerate(mal_agents[0]): if mal_agent is None: break conflict_agents = np.where( agent_conflicts[:, int(mal_agent)] > 0) for j, cur_conflict_agent in enumerate(conflict_agents[0]): cur_conflict_agent = int(cur_conflict_agent) steps_conflict = agent_conflicts[cur_conflict_agent, mal_agent] if steps_conflict <= 3: if incDiff1[cur_conflict_agent] == -1: if int(minDist[cur_conflict_agent]) >= 5: action_dict.update({cur_conflict_agent: 4}) elif agent_conflicts_count[cur_conflict_agent, mal_agent] > 1: action_dict.update({cur_conflict_agent: 4}) elif minDist[cur_conflict_agent] > incDiff1[ cur_conflict_agent]: action_dict.update({cur_conflict_agent: 4}) else: action_dict.update({ cur_conflict_agent: next_shortest_actions[cur_conflict_agent] }) obs, all_rewards, done, _ = env.step(action_dict) print("Rewards: ", all_rewards, " [done=", done, "]") for a in range(env.get_num_agents()): score += all_rewards[a] / env.get_num_agents() env_renderer.render_env(show=True, frames=True, show_observations=True) if sleep_for_animation: time.sleep(0.5) if done["__all__"]: break # Collection information about training tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window = tasks_finished / max(1, env.get_num_agents()) scores_window = score / max_steps print( '\rTraining {} Agents on ({},{}).\t Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t' .format(env.get_num_agents(), x_dim, y_dim, step, np.mean(scores_window), 100 * np.mean(done_window)), end=" ") tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window = tasks_finished / max(1, env.get_num_agents()) scores_window = score / max_steps print( '\rTraining {} Agents on ({},{}).\t Total Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t' .format(env.get_num_agents(), x_dim, y_dim, step, np.mean(scores_window), 100 * np.mean(done_window)), end=" ") env_renderer.close_window()
speed_ration_map = { 1.: 0.25, # Fast passenger train 1. / 2.: 0.25, # Fast freight train 1. / 3.: 0.25, # Slow commuter train 1. / 4.: 0.25 } # Slow freight train env = RailEnv(width=width, height=height, rail_generator=complex_rail_generator(nr_start_goal=n_start_goal, nr_extra=3, min_dist=8, max_dist=99999, seed=seed), schedule_generator=complex_schedule_generator( speed_ratio_map=speed_ration_map), number_of_agents=number_agents, obs_builder_object=TreeObsForRailEnv(max_depth=5)) env.reset() env_renderer = RenderTool(env, agent_render_variant=3) env_renderer.reset() env_renderer.render_env(show=True, show_predictions=False, show_observations=False) env.step(dict(zip(range(number_agents), [2] * number_agents))) #%% #table = LookUpTable(env.rail.grid, make_table=True) #dico = table.table #ag = Agent(env.agents[0])
def evalfun(debug=False, refresh=0.1): # refresh default = 0.1 # A list of (mapsize, agent count) tuples, change or extend this to test different sizes. #problemsizes = [(5, 1), (7, 2), (10,3), (13,4), (40, 20)] _seeds = np.random.randint(1, 99, 15) #_seed = 2984379 avg = {} for x in range(1, 4): avg[x] = [] problemsizes = [(5, x), (6, x), (8, x), (10, x), (15, x)] for problemsize in problemsizes: avg_time = 0 successes = 0 for seed in _seeds: dimension = problemsize[0] NUMBER_OF_AGENTS = problemsize[1] # Create new environment. env = RailEnv(width=dimension, height=dimension, rail_generator=complex_rail_generator( nr_start_goal=int(1.5 * NUMBER_OF_AGENTS), nr_extra=int(1.2 * NUMBER_OF_AGENTS), min_dist=int(floor(dimension / 2)), max_dist=99999, seed=0), schedule_generator=complex_schedule_generator(), malfunction_generator_and_process_data=None, number_of_agents=NUMBER_OF_AGENTS) env_renderer = RenderTool(env, screen_width=1920, screen_height=1080) env.reset(random_seed=int(seed)) if len(env.agents) != NUMBER_OF_AGENTS: continue start = time.time() schedule = planpath.search(env) duration = time.time() - start assert env.num_resets == 1 and env._elapsed_steps == 0 # Run the schedule success = False if schedule is not None: for action in schedule: _, _, _done, _ = env.step(action) success = _done['__all__'] #print(env.agents) if success: avg_time += duration print("Success:", problemsize, seed, duration) successes += 1 else: print("Bad schedule - failed.") else: print("Couldn't find solution for seed: ", duration, seed) env_renderer.render_env(show=True, frames=False, show_observations=False) avg_time = avg_time / successes avg[x].append((problemsize[0], avg_time)) #print("%10s\t%8s\t%9s" % ("Dimensions", "Success", "Runtime")) # # Initialize positions. # Time the search. #if debug: #env_renderer.render_env(show=True, frames=False, show_observations=False) #time.sleep(refresh) # Validate that environment state is unchanged. # Print the performance of the algorithm #print("%10s\t%8s\t%9.6f" % (str(problemsize), str(success), duration)) print(avg)
def evalfun(debug=False, refresh=0.1): # refresh default = 0.1 # A list of (mapsize, agent count) tuples, change or extend this to test different sizes. #problemsizes = [(5, 1), (7, 2), (10,3), (13,4), (40, 20)] #problemsizes = [(5, 1), (5,2), (6,3), (7,3), (14,4), (8,5)] problemsizes = [(6, 3)] _seed = np.random.randint(1, 9999999) #_seed = 2 print("Seed:", _seed) print("%10s\t%8s\t%9s" % ("Dimensions", "Success", "Runtime")) for problemsize in problemsizes: dimension = problemsize[0] NUMBER_OF_AGENTS = problemsize[1] # Create new environment. env = RailEnv(width=dimension, height=dimension, rail_generator=complex_rail_generator( nr_start_goal=int(1.5 * NUMBER_OF_AGENTS), nr_extra=int(1.2 * NUMBER_OF_AGENTS), min_dist=int(floor(dimension / 2)), max_dist=99999, seed=0), schedule_generator=complex_schedule_generator(), malfunction_generator_and_process_data=None, number_of_agents=NUMBER_OF_AGENTS) env_renderer = RenderTool(env, screen_width=1920, screen_height=1080) # Initialize positions. env.reset(random_seed=_seed) env_renderer.render_env(show=True, frames=False, show_observations=False) # Time the search. start = time.time() schedule = planpath.search(env) #schedule = planpath.better_search(env) duration = time.time() - start if debug: env_renderer.render_env(show=True, frames=False, show_observations=False) time.sleep(refresh) # Validate that environment state is unchanged. assert env.num_resets == 1 and env._elapsed_steps == 0 # Run the schedule success = False for action in schedule: _, _, _done, _ = env.step(action) success = _done['__all__'] #print(env.agents) if debug: #for agent in env.agents: #if agent.position: #agent_y, agent_x = agent.position #print(env.get_valid_directions_on_grid(agent_y,agent_x)) print(action) env_renderer.render_env(show=True, frames=False, show_observations=False) time.sleep(refresh) # Print the performance of the algorithm print("%10s\t%8s\t%9.6f" % (str(problemsize), str(success), duration))
def main(argv): try: opts, args = getopt.getopt(argv, "n:", ["n_episodes="]) except getopt.GetoptError: print('training_navigation.py -n <n_episodes>') sys.exit(2) for opt, arg in opts: if opt in ('-n', '--n_episodes'): n_episodes = int(arg) ## Initialize the random random.seed(1) np.random.seed(1) # Initialize a random map with a random number of agents x_dim = np.random.randint(8, 20) y_dim = np.random.randint(8, 20) n_agents = np.random.randint(3, 8) n_goals = n_agents + np.random.randint(0, 3) min_dist = int(0.75 * min(x_dim, y_dim)) tree_depth = 2 print("main2") demo = False # Get an observation builder and predictor observation_helper = TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv()) env = RailEnv(width=x_dim, height=y_dim, rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist, max_dist=99999, seed=0), schedule_generator=complex_schedule_generator(), obs_builder_object=observation_helper, number_of_agents=n_agents) env.reset(True, True) handle = env.get_agent_handles() features_per_node = env.obs_builder.observation_dim nr_nodes = 0 for i in range(tree_depth + 1): nr_nodes += np.power(4, i) state_size = 2 * features_per_node * nr_nodes # We will use two time steps per observation --> 2x state_size action_size = 5 # We set the number of episodes we would like to train on if 'n_episodes' not in locals(): n_episodes = 60000 # Set max number of steps per episode as well as other training relevant parameter max_steps = int(3 * (env.height + env.width)) eps = 1. eps_end = 0.005 eps_decay = 0.9995 action_dict = dict() final_action_dict = dict() scores_window = deque(maxlen=100) done_window = deque(maxlen=100) time_obs = deque(maxlen=2) scores = [] dones_list = [] action_prob = [0] * action_size agent_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents() # Initialize the agent agent = Agent(state_size, action_size) # Here you can pre-load an agent if False: with path(torch_training.Nets, "avoid_checkpoint500.pth") as file_in: agent.qnetwork_local.load_state_dict(torch.load(file_in)) # Do training over n_episodes for episodes in range(1, n_episodes + 1): """ Training Curriculum: In order to get good generalization we change the number of agents and the size of the levels every 50 episodes. """ if episodes % 50 == 0: x_dim = np.random.randint(8, 20) y_dim = np.random.randint(8, 20) n_agents = np.random.randint(3, 8) n_goals = n_agents + np.random.randint(0, 3) min_dist = int(0.75 * min(x_dim, y_dim)) env = RailEnv(width=x_dim, height=y_dim, rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist, max_dist=99999, seed=0), schedule_generator=complex_schedule_generator(), obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()), number_of_agents=n_agents) # Adjust the parameters according to the new env. max_steps = int(3 * (env.height + env.width)) agent_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents() # Reset environment obs, info = env.reset(True, True) # Setup placeholder for finals observation of a single agent. This is necessary because agents terminate at # different times during an episode final_obs = agent_obs.copy() final_obs_next = agent_next_obs.copy() # Build agent specific observations for a in range(env.get_num_agents()): data, distance, agent_data = split_tree_into_feature_groups(obs[a], tree_depth) data = norm_obs_clip(data) distance = norm_obs_clip(distance) agent_data = np.clip(agent_data, -1, 1) obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) # Accumulate two time steps of observation (Here just twice the first state) for i in range(2): time_obs.append(obs) # Build the agent specific double ti for a in range(env.get_num_agents()): agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) score = 0 env_done = 0 # Run episode for step in range(max_steps): # Action for a in range(env.get_num_agents()): if demo: eps = 0 # action = agent.act(np.array(obs[a]), eps=eps) action = agent.act(agent_obs[a], eps=eps) action_prob[action] += 1 action_dict.update({a: action}) # Environment step next_obs, all_rewards, done, _ = env.step(action_dict) for a in range(env.get_num_agents()): data, distance, agent_data = split_tree_into_feature_groups(next_obs[a], tree_depth) data = norm_obs_clip(data) distance = norm_obs_clip(distance) agent_data = np.clip(agent_data, -1, 1) next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) time_obs.append(next_obs) # Update replay buffer and train agent for a in range(env.get_num_agents()): agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) if done[a]: final_obs[a] = agent_obs[a].copy() final_obs_next[a] = agent_next_obs[a].copy() final_action_dict.update({a: action_dict[a]}) if not demo and not done[a]: agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a]) score += all_rewards[a] / env.get_num_agents() agent_obs = agent_next_obs.copy() if done['__all__']: env_done = 1 for a in range(env.get_num_agents()): agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a]) break # Epsilon decay eps = max(eps_end, eps_decay * eps) # decrease epsilon done_window.append(env_done) scores_window.append(score / max_steps) # save most recent score scores.append(np.mean(scores_window)) dones_list.append((np.mean(done_window))) print( '\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( env.get_num_agents(), x_dim, y_dim, episodes, np.mean(scores_window), 100 * np.mean(done_window), eps, action_prob / np.sum(action_prob)), end=" ") if episodes % 100 == 0: print( '\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( env.get_num_agents(), episodes, np.mean(scores_window), 100 * np.mean(done_window), eps, action_prob / np.sum(action_prob))) torch.save(agent.qnetwork_local.state_dict(), './Nets/avoid_checkpoint' + str(episodes) + '.pth') action_prob = [1] * action_size plt.plot(scores) plt.show()
current['valid'] = False current['distance_to_goal'] = np.inf return weights seed = random.randint(0, 2**32) print(f"Seed: {seed}") env = RailEnv(width=20, height=20, rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=10, min_dist=5, max_dist=99999, seed=seed), schedule_generator=complex_schedule_generator(), number_of_agents=1, obs_builder_object=CustomWeightObserver()) env_renderer = RenderTool(env, gl="PILSVG") agent = AbelAgent(218, 5) n_trials = 50 for trials in range(1, n_trials + 1): # Reset Environment obs = env.reset() env_renderer.reset() env_renderer.render_env(show=True, frames=True, show_observations=True) score = 0
def run_test(parameters, agent, test_nr=0, tree_depth=3): # Parameter initialization lp = LineProfiler() features_per_node = 9 start_time_scoring = time.time() action_dict = dict() nr_trials_per_test = 5 print('Running Test {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format( test_nr, parameters[0], parameters[1], parameters[2])) # Reset all measurements time_obs = deque(maxlen=2) test_scores = [] test_dones = [] # Reset environment random.seed(parameters[3]) np.random.seed(parameters[3]) nr_paths = max(2, parameters[2] + int(0.5 * parameters[2])) min_dist = int(min([parameters[0], parameters[1]]) * 0.75) env = RailEnv(width=parameters[0], height=parameters[1], rail_generator=complex_rail_generator(nr_start_goal=nr_paths, nr_extra=5, min_dist=min_dist, max_dist=99999, seed=parameters[3]), schedule_generator=complex_schedule_generator(), obs_builder_object=GlobalObsForRailEnv(), number_of_agents=parameters[2]) max_steps = int(3 * (env.height + env.width)) lp_step = lp(env.step) lp_reset = lp(env.reset) agent_obs = [None] * env.get_num_agents() printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) for trial in range(nr_trials_per_test): # Reset the env lp_reset(True, True) obs, info = env.reset(True, True) for a in range(env.get_num_agents()): data, distance, agent_data = split_tree_into_feature_groups( obs[a], tree_depth) data = norm_obs_clip(data) distance = norm_obs_clip(distance) agent_data = np.clip(agent_data, -1, 1) obs[a] = np.concatenate((np.concatenate( (data, distance)), agent_data)) for i in range(2): time_obs.append(obs) for a in range(env.get_num_agents()): agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) # Run episode trial_score = 0 for step in range(max_steps): for a in range(env.get_num_agents()): action = agent.act(agent_obs[a], eps=0) action_dict.update({a: action}) # Environment step next_obs, all_rewards, done, _ = lp_step(action_dict) for a in range(env.get_num_agents()): data, distance, agent_data = split_tree_into_feature_groups( next_obs[a], tree_depth) data = norm_obs_clip(data) distance = norm_obs_clip(distance) agent_data = np.clip(agent_data, -1, 1) next_obs[a] = np.concatenate((np.concatenate( (data, distance)), agent_data)) time_obs.append(next_obs) for a in range(env.get_num_agents()): agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) trial_score += all_rewards[a] / env.get_num_agents() if done['__all__']: break test_scores.append(trial_score / max_steps) test_dones.append(done['__all__']) printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) end_time_scoring = time.time() tot_test_time = end_time_scoring - start_time_scoring lp.print_stats() return test_scores, test_dones, tot_test_time