def main(args): try: opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""]) except getopt.GetoptError as err: print(str(err)) # will print something like "option -a not recognized" sys.exit(2) sleep_for_animation = True for o, a in opts: if o in ("--sleep-for-animation"): sleep_for_animation = str2bool(a) else: assert False, "unhandled option" # Initiate the Predictor custom_predictor = ShortestPathPredictorForRailEnv(10) # Pass the Predictor to the observation builder custom_obs_builder = ObservePredictions(custom_predictor) # Initiate Environment env = RailEnv(width=10, height=10, rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=1, min_dist=8, max_dist=99999, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=3, obs_builder_object=custom_obs_builder) obs, info = env.reset() env_renderer = RenderTool(env, gl="PILSVG") # We render the initial step and show the obsered cells as colored boxes env_renderer.render_env(show=True, frames=True, show_observations=True, show_predictions=False) action_dict = {} for step in range(100): for a in range(env.get_num_agents()): action = np.random.randint(0, 5) action_dict[a] = action obs, all_rewards, done, _ = env.step(action_dict) print("Rewards: ", all_rewards, " [done=", done, "]") env_renderer.render_env(show=True, frames=True, show_observations=True, show_predictions=False) if sleep_for_animation: time.sleep(0.5)
def main(args): try: opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""]) except getopt.GetoptError as err: print(str(err)) # will print something like "option -a not recognized" sys.exit(2) sleep_for_animation = True for o, a in opts: if o in ("--sleep-for-animation"): sleep_for_animation = str2bool(a) else: assert False, "unhandled option" env = RailEnv(width=7, height=7, rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999, seed=1), schedule_generator=complex_schedule_generator(), number_of_agents=1, obs_builder_object=SingleAgentNavigationObs()) obs, info = env.reset() env_renderer = RenderTool(env) env_renderer.render_env(show=True, frames=True, show_observations=True) for step in range(100): action = np.argmax(obs[0]) + 1 obs, all_rewards, done, _ = env.step({0: action}) print("Rewards: ", all_rewards, " [done=", done, "]") env_renderer.render_env(show=True, frames=True, show_observations=True) if sleep_for_animation: time.sleep(0.1) if done["__all__"]: break env_renderer.close_window()
def main(args): try: opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""]) except getopt.GetoptError as err: print(str(err)) # will print something like "option -a not recognized" sys.exit(2) sleep_for_animation = True for o, a in opts: if o in ("--sleep-for-animation"): sleep_for_animation = str2bool(a) else: assert False, "unhandled option" test_envs_root = "./railway" test_env_file_path = "testing_stuff.pkl" test_env_file_path = os.path.join( test_envs_root, test_env_file_path ) x_dim = 7 y_dim = 7 n_agents = 4 stochastic_data = {'prop_malfunction': 0.05, # Percentage of defective agents 'malfunction_rate': 100, # Rate of malfunction occurence 'min_duration': 20, # Minimal duration of malfunction 'max_duration': 50 # Max duration of malfunction } # Different agent types (trains) with different speeds. speed_ration_map = {1.: 0.25, # Fast passenger train 1. / 2.: 0.25, # Fast freight train 1. / 3.: 0.25, # Slow commuter train 1. / 4.: 0.25} # Slow freight train # env = RailEnv(width=1, height=1, rail_generator=rail_from_file(test_env_file_path), # schedule_generator=schedule_from_file(test_env_file_path), # #malfunction_generator_and_process_data=malfunction_from_file(test_env_file_path), # obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30))) # # #env.number_of_agents = n_agents # n_agents = env.number_of_agents env = RailEnv(width=x_dim, height=y_dim, rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=6, max_dist=99999,seed=1), # sparse_rail_generator(max_num_cities=3, # # Number of cities in map (where train stations are) # seed=1, # Random seed # grid_mode=False, # max_rails_between_cities=2, # max_rails_in_city=3), schedule_generator=complex_schedule_generator(speed_ration_map), number_of_agents=n_agents, malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), # # env = RailEnv(width=7, height=7, # rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999, # seed=1), schedule_generator=complex_schedule_generator(), # number_of_agents=n_agents, obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30))) max_steps = int(4 * 2 * (20 + env.height + env.width)) obs, info = env.reset(regenerate_rail=True, regenerate_schedule=True, random_seed=random_seed) env_renderer = RenderTool(env, gl="PILSVG") env_renderer.render_env(show=True, frames=True, show_observations=True) # Reset score and done score = 0 env_done = 0 step = 0 for step in range(max_steps): action_dict = {} for i in range(n_agents): if not obs: action_dict.update({i: 2}) elif obs[i] is not None: action = np.argmax(obs[i][1:4]) + 1 action_dict.update({i: action}) obs, all_rewards, done, _ = env.step(action_dict) print("Rewards: ", all_rewards, " [done=", done, "]") for a in range(env.get_num_agents()): score += all_rewards[a] / env.get_num_agents() env_renderer.render_env(show=True, frames=True, show_observations=True) if sleep_for_animation: time.sleep(0.5) if done["__all__"]: break # Collection information about training tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window = tasks_finished / max(1, env.get_num_agents()) scores_window = score / max_steps print( '\rTraining {} Agents on ({},{}).\t Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format( env.get_num_agents(), x_dim, y_dim, step, np.mean(scores_window), 100 * np.mean(done_window)), end=" ") tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window = tasks_finished / max(1, env.get_num_agents()) scores_window = score / max_steps print( '\rTraining {} Agents on ({},{}).\t Total Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format( env.get_num_agents(), x_dim, y_dim, step, np.mean(scores_window), 100 * np.mean(done_window)), end=" ") env_renderer.close_window()
def main(args): try: opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""]) except getopt.GetoptError as err: print(str(err)) # will print something like "option -a not recognized" sys.exit(2) sleep_for_animation = True for o, a in opts: if o in ("--sleep-for-animation"): sleep_for_animation = str2bool(a) else: assert False, "unhandled option" test_envs_root = f"./test-envs/Test_{test_env_no}" test_env_file_path = f"Level_{level_no}.pkl" test_env_file_path = os.path.join( test_envs_root, test_env_file_path ) x_dim = 35 y_dim = 35 n_agents = 10 stochastic_data = {'prop_malfunction': 0.05, # Percentage of defective agents 'malfunction_rate': 100, # Rate of malfunction occurence 'min_duration': 2, # Minimal duration of malfunction 'max_duration': 5 # Max duration of malfunction } # Different agent types (trains) with different speeds. speed_ration_map = {1.: 0.25, # Fast passenger train 1. / 2.: 0.25, # Fast freight train 1. / 3.: 0.25, # Slow commuter train 1. / 4.: 0.25} # Slow freight train env = RailEnv(width=x_dim, height=y_dim, #rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=6, max_dist=99999,seed=1), rail_generator=sparse_rail_generator(max_num_cities=3, # Number of cities in map (where train stations are) seed=1, # Random seed grid_mode=False, max_rails_between_cities=2, max_rails_in_city=3), #schedule_generator=complex_schedule_generator(speed_ration_map), schedule_generator=sparse_schedule_generator(speed_ration_map), number_of_agents=n_agents, malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30))) # print(f"Testing Environment: {test_env_file_path} with seed: {random_seed}") # env = RailEnv(width=1, height=1, rail_generator=rail_from_file(test_env_file_path), # schedule_generator=schedule_from_file(test_env_file_path), # malfunction_generator_and_process_data=malfunction_from_file(test_env_file_path), # obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30))) obs, info = env.reset(regenerate_rail=True, regenerate_schedule=True, activate_agents=False, random_seed=random_seed) n_agents = env.get_num_agents() x_dim, y_dim = env.width,env.height max_steps = int(4 * 2 * (20 + env.height + env.width)) env_renderer = RenderTool(env, gl="PILSVG") env_renderer.render_env(show=True, frames=True, show_observations=True) # Reset score and done score = 0 env_done = 0 step = 0 for step in range(max_steps): for i in range(n_agents): if obs[i] is not None: observations, prediction_data, prediction_pos = obs[i] break action_dict = {} next_shortest_actions = 2*np.ones(n_agents) next_next_shortest_actions = 2*np.ones(n_agents) agent_conflicts = np.zeros((n_agents,n_agents)) agent_conflicts_count = np.zeros((n_agents, n_agents)) minDist = -1 *np.ones(n_agents) incDiff1 = -1 * np.ones(n_agents) incDiff2 = -1 * np.ones(n_agents) malfunc = np.zeros(n_agents) speed = np.ones(n_agents) pos_frac = np.ones(n_agents) agent_num_conflicts = [] vals = [] counts = [] counter = np.zeros(n_agents) for i in range(30): pos = prediction_pos[i] val, count = np.unique(pos, return_counts=True) if(val[0] == -1): val = val[1:] count = count[1:] vals.append(val) counts.append(count) for j,curVal in enumerate(val): #curVal = vals[i] curCount = count[j] if curCount > 1: idxs = np.argwhere(pos == curVal) lsIdx = [int(x) for x in idxs] combs = list(combinations(lsIdx,2)) for k,comb in enumerate(combs): counter[comb[0]] += 1 counter[comb[1]] += 1 agent_conflicts_count[comb[0], comb[1]] = (counter[comb[0]] + counter[comb[1]])/2 if agent_conflicts[comb[0], comb[1]] == 0: agent_conflicts[comb[0], comb[1]] = i else: agent_conflicts[comb[0], comb[1]] = min(i, agent_conflicts[comb[0], comb[1]]) for i in range(n_agents): agent_num_conflicts.append(sum(agent_conflicts[i,:])) if not obs or obs is None or obs[i] is None: action_dict.update({i: 2}) elif obs[i][0] is not None: shortest_action = np.argmax(obs[i][0][1:4]) + 1 next_shortest_action = np.argmax(obs[i][0][5:7]) + 1 next_next_shortest_action = np.argmax(obs[i][0][8:10]) + 1 next_shortest_actions[i] = next_shortest_action next_next_shortest_actions[i] = next_next_shortest_action malfunc[i] = obs[i][0][-3] speed[i] = obs[i][0][-2] pos_frac[i] = obs[i][0][-1] minDist[i] = obs[i][0][0] incDiff1[i] = obs[i][0][-5] incDiff2[i] = obs[i][0][-4] action_dict.update({i: shortest_action}) else: action_dict.update({i: 2}) mal_agents = (np.array(-1)) for i in range(n_agents): if agent_num_conflicts[i] > 0: mal_agents = np.where(malfunc > 0) for i,mal_agent in enumerate(mal_agents[0]): if mal_agent is None: break conflict_agents = np.where(agent_conflicts[:,int(mal_agent)]>0) for j,cur_conflict_agent in enumerate(conflict_agents[0]): cur_conflict_agent = int(cur_conflict_agent) steps_conflict = agent_conflicts[cur_conflict_agent, mal_agent] if steps_conflict <= 3: if incDiff1[cur_conflict_agent] == -1: if int(minDist[cur_conflict_agent]) >= 5: action_dict.update({cur_conflict_agent: 4}) elif agent_conflicts_count[cur_conflict_agent,mal_agent] > 1: action_dict.update({cur_conflict_agent: 4}) elif minDist[cur_conflict_agent] > incDiff1[cur_conflict_agent]: action_dict.update({cur_conflict_agent: 4}) else: action_dict.update({cur_conflict_agent: next_shortest_actions[cur_conflict_agent]}) obs, all_rewards, done, _ = env.step(action_dict) print("Rewards: ", all_rewards, " [done=", done, "]") for a in range(env.get_num_agents()): score += all_rewards[a] / env.get_num_agents() env_renderer.render_env(show=True, frames=True, show_observations=True) if sleep_for_animation: time.sleep(0.5) if done["__all__"]: break # Collection information about training tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window = tasks_finished / max(1, env.get_num_agents()) scores_window = score / max_steps print( '\rTraining {} Agents on ({},{}).\t Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format( env.get_num_agents(), x_dim, y_dim, step, np.mean(scores_window), 100 * np.mean(done_window)), end=" ") tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window = tasks_finished / max(1, env.get_num_agents()) scores_window = score / max_steps print( '\rTraining {} Agents on ({},{}).\t Total Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format( env.get_num_agents(), x_dim, y_dim, step, np.mean(scores_window), 100 * np.mean(done_window)), end=" ") env_renderer.close_window()
def main(args): try: opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""]) except getopt.GetoptError as err: print(str(err)) # will print something like "option -a not recognized" sys.exit(2) sleep_for_animation = True for o, a in opts: if o in ("--sleep-for-animation"): sleep_for_animation = str2bool(a) else: assert False, "unhandled option" test_envs_root = f"./test-envs/Test_{test_env_no}" test_env_file_path = f"Level_{level_no}.pkl" test_env_file_path = os.path.join( test_envs_root, test_env_file_path ) print(f"Testing Environment: {test_env_file_path} with seed: {random_seed}") env = RailEnv(width=1, height=1, rail_generator=rail_from_file(test_env_file_path), schedule_generator=schedule_from_file(test_env_file_path), malfunction_generator_and_process_data=malfunction_from_file(test_env_file_path), obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30))) max_steps = int(4 * 2 * (20 + env.height + env.width)) obs, info = env.reset(regenerate_rail=True, regenerate_schedule=True, activate_agents=False, random_seed=random_seed) env_renderer = RenderTool(env, gl="PILSVG") env_renderer.render_env(show=True, frames=True, show_observations=True) n_agents = env.get_num_agents() x_dim, y_dim = env.width,env.height # Reset score and done score = 0 env_done = 0 step = 0 for step in range(max_steps): action_dict = {} for i in range(n_agents): if not obs: action_dict.update({i: 2}) elif obs[i] is not None: action = np.argmax(obs[i][1:4]) + 1 action_dict.update({i: action}) obs, all_rewards, done, _ = env.step(action_dict) print("Rewards: ", all_rewards, " [done=", done, "]") for a in range(env.get_num_agents()): score += all_rewards[a] / env.get_num_agents() env_renderer.render_env(show=True, frames=True, show_observations=True) if sleep_for_animation: time.sleep(0.5) if done["__all__"]: break # Collection information about training tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window = tasks_finished / max(1, env.get_num_agents()) scores_window = score / max_steps print( '\rTraining {} Agents on ({},{}).\t Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format( n_agents, x_dim, y_dim, step, np.mean(scores_window), 100 * np.mean(done_window)), end=" ") tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window = tasks_finished / max(1, env.get_num_agents()) scores_window = score / max_steps print( '\rTraining {} Agents on ({},{}).\t Total Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format( n_agents, x_dim, y_dim, step, np.mean(scores_window), 100 * np.mean(done_window)), end=" ") env_renderer.close_window()
def main(args): try: opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""]) except getopt.GetoptError as err: print(str(err)) # will print something like "option -a not recognized" sys.exit(2) sleep_for_animation = True for o, a in opts: if o in ("--sleep-for-animation"): sleep_for_animation = str2bool(a) else: assert False, "unhandled option" batch_builder = SampleBatchBuilder() # or MultiAgentSampleBatchBuilder writer = JsonWriter("./out/") # Setting these 2 parameters to True can slow down training visuals = False sleep_for_animation = False if visuals: from flatland.utils.rendertools import RenderTool max_depth = 30 tree_depth = 2 trial_start = 100 n_trials = 999 start = 0 columns = [ 'Agents', 'X_DIM', 'Y_DIM', 'TRIAL_NO', 'REWARD', 'NORMALIZED_REWARD', 'DONE_RATIO', 'STEPS', 'ACTION_PROB' ] df_all_results = pd.DataFrame(columns=columns) for trials in range(trial_start, n_trials + 1): env_file = f"envs-100-999/envs/Level_{trials}.pkl" # env_file = f"../env_configs/round_1-small/Test_0/Level_{trials}.mpk" # file = f"../env_configs/actions-small/Test_0/Level_{trials}.mpk" file = f"envs-100-999/actions/envs/Level_{trials}.json" if not os.path.isfile(env_file) or not os.path.isfile(file): print("Missing file!", env_file, file) continue step = 0 obs_builder_object = TreeObsForRailEnv( max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv(max_depth)) env = RailEnv( width=1, height=1, rail_generator=rail_from_file(env_file), schedule_generator=schedule_from_file(env_file), malfunction_generator_and_process_data=malfunction_from_file( env_file), obs_builder_object=obs_builder_object) obs, info = env.reset(regenerate_rail=True, regenerate_schedule=True, activate_agents=False, random_seed=1001) with open(file, "r") as files: expert_actions = json.load(files) n_agents = env.get_num_agents() x_dim, y_dim = env.width, env.height agent_obs = [None] * n_agents agent_obs_buffer = [None] * n_agents done = dict() done["__all__"] = False if imitate: agent_action_buffer = list(expert_actions[step].values()) else: # , p=[0.2, 0, 0.5]) # [0] * n_agents agent_action_buffer = np.random.choice(5, n_agents, replace=True) update_values = [False] * n_agents max_steps = int(4 * 2 * (20 + env.height + env.width)) action_size = 5 # 3 # And some variables to keep track of the progress action_dict = dict() scores_window = deque(maxlen=100) reward_window = deque(maxlen=100) done_window = deque(maxlen=100) action_prob = [0] * action_size # agent = Agent(state_size, action_size) if visuals: env_renderer = RenderTool(env, gl="PILSVG") env_renderer.render_env(show=True, frames=True, show_observations=True) for a in range(n_agents): if obs[a]: agent_obs[a] = normalize_observation(obs[a], tree_depth, observation_radius=10) agent_obs_buffer[a] = agent_obs[a].copy() # Reset score and done score = 0 agent_action_buffer = np.zeros(n_agents) # prev_action = np.zeros_like(envs.action_space.sample()) prev_reward = np.zeros(n_agents) for step in range(max_steps): for a in range(n_agents): if info['action_required'][a]: if imitate: if step < len(expert_actions): action = expert_actions[step][str(a)] else: action = 0 else: action = 0 action_prob[action] += 1 update_values[a] = True else: update_values[a] = False action = 0 action_dict.update({a: action}) next_obs, all_rewards, done, info = env.step(action_dict) for a in range(n_agents): if next_obs[a] is not None: agent_obs[a] = normalize_observation(next_obs[a], tree_depth, observation_radius=10) # Only update the values when we are done or when an action # was taken and thus relevant information is present if update_values[a] or done[a]: start += 1 batch_builder.add_values( t=step, eps_id=trials, agent_index=0, obs=agent_obs_buffer[a], actions=action_dict[a], action_prob=1.0, # put the true action probability rewards=all_rewards[a], prev_actions=agent_action_buffer[a], prev_rewards=prev_reward[a], dones=done[a], infos=info['action_required'][a], new_obs=agent_obs[a]) agent_obs_buffer[a] = agent_obs[a].copy() agent_action_buffer[a] = action_dict[a] prev_reward[a] = all_rewards[a] score += all_rewards[a] # / envs.get_num_agents() if visuals: env_renderer.render_env(show=True, frames=True, show_observations=True) if sleep_for_animation: time.sleep(0.5) if done["__all__"] or step > max_steps: writer.write(batch_builder.build_and_reset()) break # Collection information about training if step % 100 == 0: tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 print( '\rTrial No {} Training {} Agents on ({},{}).\t Steps {}\t Reward: {:.3f}\t Normalized Reward: {:.3f}\tDones: {:.2f}%\t' .format( trials, env.get_num_agents(), x_dim, y_dim, step, score, score / (max_steps + n_agents), 100 * np.mean( tasks_finished / max(1, env.get_num_agents()))), end=" ") tasks_finished = 0 for current_agent in env.agents: if current_agent.status == RailAgentStatus.DONE_REMOVED: tasks_finished += 1 done_window.append(tasks_finished / max(1, env.get_num_agents())) reward_window.append(score) scores_window.append(score / (max_steps + n_agents)) data = [[ n_agents, x_dim, y_dim, trials, np.mean(reward_window), np.mean(scores_window), 100 * np.mean(done_window), step, action_prob / np.sum(action_prob) ]] df_cur = pd.DataFrame(data, columns=columns) df_all_results = pd.concat([df_all_results, df_cur]) if imitate: df_all_results.to_csv( f'TreeImitationLearning_DQN_TrainingResults.csv', index=False) print( '\rTrial No {} Training {} Agents on ({},{}).\t Total Steps {}\t Reward: {:.3f}\t Normalized Reward: {:.3f}\tDones: {:.2f}%\t' .format(trials, env.get_num_agents(), x_dim, y_dim, step, np.mean(reward_window), np.mean(scores_window), 100 * np.mean(done_window))) if visuals: env_renderer.close_window() gc.collect()