Esempio n. 1
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    # Initiate the Predictor
    custom_predictor = ShortestPathPredictorForRailEnv(10)

    # Pass the Predictor to the observation builder
    custom_obs_builder = ObservePredictions(custom_predictor)

    # Initiate Environment
    env = RailEnv(width=10,
                  height=10,
                  rail_generator=complex_rail_generator(nr_start_goal=5,
                                                        nr_extra=1,
                                                        min_dist=8,
                                                        max_dist=99999,
                                                        seed=1),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=3,
                  obs_builder_object=custom_obs_builder)

    obs, info = env.reset()
    env_renderer = RenderTool(env, gl="PILSVG")

    # We render the initial step and show the obsered cells as colored boxes
    env_renderer.render_env(show=True,
                            frames=True,
                            show_observations=True,
                            show_predictions=False)

    action_dict = {}
    for step in range(100):
        for a in range(env.get_num_agents()):
            action = np.random.randint(0, 5)
            action_dict[a] = action
        obs, all_rewards, done, _ = env.step(action_dict)
        print("Rewards: ", all_rewards, "  [done=", done, "]")
        env_renderer.render_env(show=True,
                                frames=True,
                                show_observations=True,
                                show_predictions=False)
        if sleep_for_animation:
            time.sleep(0.5)
Esempio n. 2
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    env = RailEnv(width=7,
                  height=7,
                  rail_generator=complex_rail_generator(nr_start_goal=10,
                                                        nr_extra=1,
                                                        min_dist=5,
                                                        max_dist=99999,
                                                        seed=1),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=1,
                  obs_builder_object=SingleAgentNavigationObs())

    obs, info = env.reset()
    env_renderer = RenderTool(env)
    env_renderer.render_env(show=True, frames=True, show_observations=True)
    for step in range(100):
        action = np.argmax(obs[0]) + 1
        obs, all_rewards, done, _ = env.step({0: action})
        print("Rewards: ", all_rewards, "  [done=", done, "]")
        env_renderer.render_env(show=True, frames=True, show_observations=True)
        if sleep_for_animation:
            time.sleep(0.1)
        if done["__all__"]:
            break
    env_renderer.close_window()
Esempio n. 3
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    test_envs_root = "./railway"
    test_env_file_path = "testing_stuff.pkl"

    test_env_file_path = os.path.join(
        test_envs_root,
        test_env_file_path
    )

    x_dim = 7
    y_dim = 7
    n_agents = 4

    stochastic_data = {'prop_malfunction': 0.05,  # Percentage of defective agents
                       'malfunction_rate': 100,  # Rate of malfunction occurence
                       'min_duration': 20,  # Minimal duration of malfunction
                       'max_duration': 50  # Max duration of malfunction
                       }

    # Different agent types (trains) with different speeds.
    speed_ration_map = {1.: 0.25,  # Fast passenger train
                        1. / 2.: 0.25,  # Fast freight train
                        1. / 3.: 0.25,  # Slow commuter train
                        1. / 4.: 0.25}  # Slow freight train

    # env = RailEnv(width=1, height=1, rail_generator=rail_from_file(test_env_file_path),
    #                    schedule_generator=schedule_from_file(test_env_file_path),
    #                    #malfunction_generator_and_process_data=malfunction_from_file(test_env_file_path),
    #                    obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30)))
    #
    # #env.number_of_agents = n_agents
    # n_agents = env.number_of_agents
    env = RailEnv(width=x_dim,
                  height=y_dim,
                  rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=6, max_dist=99999,seed=1),
                  # sparse_rail_generator(max_num_cities=3,
                  #                                      # Number of cities in map (where train stations are)
                  #                                      seed=1,  # Random seed
                  #                                      grid_mode=False,
                  #                                      max_rails_between_cities=2,
                  #                                      max_rails_in_city=3),
                  schedule_generator=complex_schedule_generator(speed_ration_map),
                  number_of_agents=n_agents,
                  malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
    #
    # env = RailEnv(width=7, height=7,
    #               rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999,
    #                                                     seed=1), schedule_generator=complex_schedule_generator(),
    #               number_of_agents=n_agents,
                  obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30)))

    max_steps = int(4 * 2 * (20 + env.height + env.width))
    obs, info = env.reset(regenerate_rail=True,
            regenerate_schedule=True,
            random_seed=random_seed)
    env_renderer = RenderTool(env, gl="PILSVG")
    env_renderer.render_env(show=True, frames=True, show_observations=True)

    # Reset score and done
    score = 0
    env_done = 0
    step = 0
    for step in range(max_steps):
        action_dict = {}
        for i in range(n_agents):
            if not obs:
                action_dict.update({i: 2})
            elif obs[i] is not None:
                action = np.argmax(obs[i][1:4]) + 1
                action_dict.update({i: action})

        obs, all_rewards, done, _ = env.step(action_dict)
        print("Rewards: ", all_rewards, "  [done=", done, "]")

        for a in range(env.get_num_agents()):
            score += all_rewards[a] / env.get_num_agents()

        env_renderer.render_env(show=True, frames=True, show_observations=True)
        if sleep_for_animation:
            time.sleep(0.5)
        if done["__all__"]:
            break

        # Collection information about training
        tasks_finished = 0
        for current_agent in env.agents:
            if current_agent.status == RailAgentStatus.DONE_REMOVED:
                tasks_finished += 1
        done_window = tasks_finished / max(1, env.get_num_agents())
        scores_window = score / max_steps
        print(
            '\rTraining {} Agents on ({},{}).\t Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format(
                env.get_num_agents(), x_dim, y_dim,
                step,
                np.mean(scores_window),
                100 * np.mean(done_window)), end=" ")

    tasks_finished = 0
    for current_agent in env.agents:
        if current_agent.status == RailAgentStatus.DONE_REMOVED:
            tasks_finished += 1
    done_window = tasks_finished / max(1, env.get_num_agents())
    scores_window = score / max_steps
    print(
        '\rTraining {} Agents on ({},{}).\t Total Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format(
            env.get_num_agents(), x_dim, y_dim,
            step,
            np.mean(scores_window),
            100 * np.mean(done_window)), end=" ")

    env_renderer.close_window()
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    test_envs_root = f"./test-envs/Test_{test_env_no}"
    test_env_file_path = f"Level_{level_no}.pkl"

    test_env_file_path = os.path.join(
        test_envs_root,
        test_env_file_path
    )

    x_dim = 35
    y_dim = 35
    n_agents = 10

    stochastic_data = {'prop_malfunction': 0.05,  # Percentage of defective agents
                       'malfunction_rate': 100,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

    # Different agent types (trains) with different speeds.
    speed_ration_map = {1.: 0.25,  # Fast passenger train
                        1. / 2.: 0.25,  # Fast freight train
                        1. / 3.: 0.25,  # Slow commuter train
                        1. / 4.: 0.25}  # Slow freight train

    env = RailEnv(width=x_dim,
                  height=y_dim,
                  #rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=6, max_dist=99999,seed=1),
                  rail_generator=sparse_rail_generator(max_num_cities=3,
                                                       # Number of cities in map (where train stations are)
                                                       seed=1,  # Random seed
                                                       grid_mode=False,
                                                       max_rails_between_cities=2,
                                                       max_rails_in_city=3),
                  #schedule_generator=complex_schedule_generator(speed_ration_map),
                  schedule_generator=sparse_schedule_generator(speed_ration_map),
                  number_of_agents=n_agents,
                  malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
                  obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30)))


    # print(f"Testing Environment: {test_env_file_path} with seed: {random_seed}")
    # env = RailEnv(width=1, height=1, rail_generator=rail_from_file(test_env_file_path),
    #                    schedule_generator=schedule_from_file(test_env_file_path),
    #                    malfunction_generator_and_process_data=malfunction_from_file(test_env_file_path),
    #                    obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30)))

    obs, info = env.reset(regenerate_rail=True,
            regenerate_schedule=True,
            activate_agents=False,
            random_seed=random_seed)

    n_agents = env.get_num_agents()
    x_dim, y_dim = env.width,env.height

    max_steps = int(4 * 2 * (20 + env.height + env.width))

    env_renderer = RenderTool(env, gl="PILSVG")
    env_renderer.render_env(show=True, frames=True, show_observations=True)

    # Reset score and done
    score = 0
    env_done = 0
    step = 0
    for step in range(max_steps):

        for i in range(n_agents):
            if obs[i] is not None:
                observations, prediction_data, prediction_pos = obs[i]
                break

        action_dict = {}
        next_shortest_actions = 2*np.ones(n_agents)
        next_next_shortest_actions = 2*np.ones(n_agents)
        agent_conflicts = np.zeros((n_agents,n_agents))
        agent_conflicts_count = np.zeros((n_agents, n_agents))
        minDist = -1 *np.ones(n_agents)
        incDiff1 = -1 * np.ones(n_agents)
        incDiff2 = -1 * np.ones(n_agents)
        malfunc = np.zeros(n_agents)
        speed = np.ones(n_agents)
        pos_frac = np.ones(n_agents)
        agent_num_conflicts = []

        vals = []
        counts = []
        counter = np.zeros(n_agents)
        for i in range(30):
            pos = prediction_pos[i]
            val, count = np.unique(pos, return_counts=True)
            if(val[0] == -1):
                val = val[1:]
                count = count[1:]
            vals.append(val)
            counts.append(count)

            for j,curVal in enumerate(val):
                #curVal = vals[i]
                curCount = count[j]
                if curCount > 1:
                    idxs = np.argwhere(pos == curVal)
                    lsIdx = [int(x) for x in idxs]
                    combs = list(combinations(lsIdx,2))
                    for k,comb in enumerate(combs):
                        counter[comb[0]] += 1
                        counter[comb[1]] += 1
                        agent_conflicts_count[comb[0], comb[1]] = (counter[comb[0]] + counter[comb[1]])/2
                        if agent_conflicts[comb[0], comb[1]] == 0:
                            agent_conflicts[comb[0], comb[1]] = i
                        else:
                            agent_conflicts[comb[0], comb[1]] = min(i, agent_conflicts[comb[0], comb[1]])

        for i in range(n_agents):
            agent_num_conflicts.append(sum(agent_conflicts[i,:]))
            if not obs or obs is None or obs[i] is None:
                action_dict.update({i: 2})
            elif obs[i][0] is not None:
                shortest_action = np.argmax(obs[i][0][1:4]) + 1
                next_shortest_action = np.argmax(obs[i][0][5:7]) + 1
                next_next_shortest_action = np.argmax(obs[i][0][8:10]) + 1
                next_shortest_actions[i] = next_shortest_action
                next_next_shortest_actions[i] = next_next_shortest_action
                malfunc[i] = obs[i][0][-3]
                speed[i] = obs[i][0][-2]
                pos_frac[i] = obs[i][0][-1]
                minDist[i] = obs[i][0][0]
                incDiff1[i] = obs[i][0][-5]
                incDiff2[i] = obs[i][0][-4]
                action_dict.update({i: shortest_action})
            else:
                action_dict.update({i: 2})
        mal_agents = (np.array(-1))
        for i in range(n_agents):
            if agent_num_conflicts[i] > 0:
                mal_agents = np.where(malfunc > 0)
                for i,mal_agent in enumerate(mal_agents[0]):
                    if mal_agent is None:
                        break
                    conflict_agents = np.where(agent_conflicts[:,int(mal_agent)]>0)

                    for j,cur_conflict_agent in enumerate(conflict_agents[0]):
                        cur_conflict_agent = int(cur_conflict_agent)
                        steps_conflict = agent_conflicts[cur_conflict_agent, mal_agent]
                        if steps_conflict <= 3:
                            if incDiff1[cur_conflict_agent] == -1:
                                if int(minDist[cur_conflict_agent]) >= 5:
                                    action_dict.update({cur_conflict_agent: 4})
                                elif agent_conflicts_count[cur_conflict_agent,mal_agent] > 1:
                                    action_dict.update({cur_conflict_agent: 4})
                            elif minDist[cur_conflict_agent] > incDiff1[cur_conflict_agent]:
                                action_dict.update({cur_conflict_agent: 4})
                            else:
                                action_dict.update({cur_conflict_agent: next_shortest_actions[cur_conflict_agent]})

        obs, all_rewards, done, _ = env.step(action_dict)

        print("Rewards: ", all_rewards, "  [done=", done, "]")

        for a in range(env.get_num_agents()):
            score += all_rewards[a] / env.get_num_agents()

        env_renderer.render_env(show=True, frames=True, show_observations=True)
        if sleep_for_animation:
            time.sleep(0.5)
        if done["__all__"]:
            break

        # Collection information about training
        tasks_finished = 0
        for current_agent in env.agents:
            if current_agent.status == RailAgentStatus.DONE_REMOVED:
                tasks_finished += 1
        done_window = tasks_finished / max(1, env.get_num_agents())
        scores_window = score / max_steps
        print(
            '\rTraining {} Agents on ({},{}).\t Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format(
                env.get_num_agents(), x_dim, y_dim,
                step,
                np.mean(scores_window),
                100 * np.mean(done_window)), end=" ")

    tasks_finished = 0
    for current_agent in env.agents:
        if current_agent.status == RailAgentStatus.DONE_REMOVED:
            tasks_finished += 1
    done_window = tasks_finished / max(1, env.get_num_agents())
    scores_window = score / max_steps
    print(
        '\rTraining {} Agents on ({},{}).\t Total Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format(
            env.get_num_agents(), x_dim, y_dim,
            step,
            np.mean(scores_window),
            100 * np.mean(done_window)), end=" ")

    env_renderer.close_window()
Esempio n. 5
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    test_envs_root = f"./test-envs/Test_{test_env_no}"
    test_env_file_path = f"Level_{level_no}.pkl"

    test_env_file_path = os.path.join(
        test_envs_root,
        test_env_file_path
    )
    print(f"Testing Environment: {test_env_file_path} with seed: {random_seed}")

    env = RailEnv(width=1, height=1, rail_generator=rail_from_file(test_env_file_path),
                       schedule_generator=schedule_from_file(test_env_file_path),
                       malfunction_generator_and_process_data=malfunction_from_file(test_env_file_path),
                       obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30)))

    max_steps = int(4 * 2 * (20 + env.height + env.width))

    obs, info = env.reset(regenerate_rail=True,
            regenerate_schedule=True,
            activate_agents=False,
            random_seed=random_seed)
    env_renderer = RenderTool(env, gl="PILSVG")
    env_renderer.render_env(show=True, frames=True, show_observations=True)
    n_agents = env.get_num_agents()
    x_dim, y_dim = env.width,env.height
    # Reset score and done
    score = 0
    env_done = 0
    step = 0
    for step in range(max_steps):
        action_dict = {}
        for i in range(n_agents):
            if not obs:
                action_dict.update({i: 2})
            elif obs[i] is not None:
                action = np.argmax(obs[i][1:4]) + 1
                action_dict.update({i: action})

        obs, all_rewards, done, _ = env.step(action_dict)
        print("Rewards: ", all_rewards, "  [done=", done, "]")

        for a in range(env.get_num_agents()):
            score += all_rewards[a] / env.get_num_agents()

        env_renderer.render_env(show=True, frames=True, show_observations=True)
        if sleep_for_animation:
            time.sleep(0.5)
        if done["__all__"]:
            break

        # Collection information about training
        tasks_finished = 0
        for current_agent in env.agents:
            if current_agent.status == RailAgentStatus.DONE_REMOVED:
                tasks_finished += 1
        done_window = tasks_finished / max(1, env.get_num_agents())
        scores_window = score / max_steps
        print(
            '\rTraining {} Agents on ({},{}).\t Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format(
                n_agents, x_dim, y_dim,
                step,
                np.mean(scores_window),
                100 * np.mean(done_window)), end=" ")

    tasks_finished = 0
    for current_agent in env.agents:
        if current_agent.status == RailAgentStatus.DONE_REMOVED:
            tasks_finished += 1
    done_window = tasks_finished / max(1, env.get_num_agents())
    scores_window = score / max_steps
    print(
        '\rTraining {} Agents on ({},{}).\t Total Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format(
            n_agents, x_dim, y_dim,
            step,
            np.mean(scores_window),
            100 * np.mean(done_window)), end=" ")

    env_renderer.close_window()
Esempio n. 6
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    batch_builder = SampleBatchBuilder()  # or MultiAgentSampleBatchBuilder
    writer = JsonWriter("./out/")

    #  Setting these 2 parameters to True can slow down training
    visuals = False
    sleep_for_animation = False

    if visuals:
        from flatland.utils.rendertools import RenderTool

    max_depth = 30
    tree_depth = 2
    trial_start = 100
    n_trials = 999
    start = 0

    columns = [
        'Agents', 'X_DIM', 'Y_DIM', 'TRIAL_NO', 'REWARD', 'NORMALIZED_REWARD',
        'DONE_RATIO', 'STEPS', 'ACTION_PROB'
    ]
    df_all_results = pd.DataFrame(columns=columns)

    for trials in range(trial_start, n_trials + 1):

        env_file = f"envs-100-999/envs/Level_{trials}.pkl"
        # env_file = f"../env_configs/round_1-small/Test_0/Level_{trials}.mpk"

        # file = f"../env_configs/actions-small/Test_0/Level_{trials}.mpk"
        file = f"envs-100-999/actions/envs/Level_{trials}.json"

        if not os.path.isfile(env_file) or not os.path.isfile(file):
            print("Missing file!", env_file, file)
            continue

        step = 0

        obs_builder_object = TreeObsForRailEnv(
            max_depth=tree_depth,
            predictor=ShortestPathPredictorForRailEnv(max_depth))

        env = RailEnv(
            width=1,
            height=1,
            rail_generator=rail_from_file(env_file),
            schedule_generator=schedule_from_file(env_file),
            malfunction_generator_and_process_data=malfunction_from_file(
                env_file),
            obs_builder_object=obs_builder_object)

        obs, info = env.reset(regenerate_rail=True,
                              regenerate_schedule=True,
                              activate_agents=False,
                              random_seed=1001)

        with open(file, "r") as files:
            expert_actions = json.load(files)

        n_agents = env.get_num_agents()
        x_dim, y_dim = env.width, env.height

        agent_obs = [None] * n_agents
        agent_obs_buffer = [None] * n_agents
        done = dict()
        done["__all__"] = False

        if imitate:
            agent_action_buffer = list(expert_actions[step].values())
        else:
            # , p=[0.2, 0, 0.5])  # [0] * n_agents
            agent_action_buffer = np.random.choice(5, n_agents, replace=True)
        update_values = [False] * n_agents

        max_steps = int(4 * 2 * (20 + env.height + env.width))

        action_size = 5  # 3

        # And some variables to keep track of the progress
        action_dict = dict()
        scores_window = deque(maxlen=100)
        reward_window = deque(maxlen=100)
        done_window = deque(maxlen=100)
        action_prob = [0] * action_size

        # agent = Agent(state_size, action_size)

        if visuals:
            env_renderer = RenderTool(env, gl="PILSVG")
            env_renderer.render_env(show=True,
                                    frames=True,
                                    show_observations=True)

        for a in range(n_agents):
            if obs[a]:
                agent_obs[a] = normalize_observation(obs[a],
                                                     tree_depth,
                                                     observation_radius=10)
                agent_obs_buffer[a] = agent_obs[a].copy()

        # Reset score and done
        score = 0
        agent_action_buffer = np.zeros(n_agents)
        # prev_action = np.zeros_like(envs.action_space.sample())
        prev_reward = np.zeros(n_agents)
        for step in range(max_steps):
            for a in range(n_agents):
                if info['action_required'][a]:
                    if imitate:
                        if step < len(expert_actions):
                            action = expert_actions[step][str(a)]
                        else:
                            action = 0
                    else:
                        action = 0

                    action_prob[action] += 1
                    update_values[a] = True

                else:
                    update_values[a] = False
                    action = 0

                action_dict.update({a: action})

            next_obs, all_rewards, done, info = env.step(action_dict)

            for a in range(n_agents):

                if next_obs[a] is not None:
                    agent_obs[a] = normalize_observation(next_obs[a],
                                                         tree_depth,
                                                         observation_radius=10)

                # Only update the values when we are done or when an action
                # was taken and thus relevant information is present
                if update_values[a] or done[a]:
                    start += 1

                    batch_builder.add_values(
                        t=step,
                        eps_id=trials,
                        agent_index=0,
                        obs=agent_obs_buffer[a],
                        actions=action_dict[a],
                        action_prob=1.0,  # put the true action probability
                        rewards=all_rewards[a],
                        prev_actions=agent_action_buffer[a],
                        prev_rewards=prev_reward[a],
                        dones=done[a],
                        infos=info['action_required'][a],
                        new_obs=agent_obs[a])

                agent_obs_buffer[a] = agent_obs[a].copy()
                agent_action_buffer[a] = action_dict[a]
                prev_reward[a] = all_rewards[a]

                score += all_rewards[a]  # / envs.get_num_agents()

            if visuals:
                env_renderer.render_env(show=True,
                                        frames=True,
                                        show_observations=True)
                if sleep_for_animation:
                    time.sleep(0.5)

            if done["__all__"] or step > max_steps:
                writer.write(batch_builder.build_and_reset())
                break

            # Collection information about training
            if step % 100 == 0:
                tasks_finished = 0
                for current_agent in env.agents:
                    if current_agent.status == RailAgentStatus.DONE_REMOVED:
                        tasks_finished += 1
                print(
                    '\rTrial No {} Training {} Agents on ({},{}).\t Steps {}\t Reward: {:.3f}\t Normalized Reward: {:.3f}\tDones: {:.2f}%\t'
                    .format(
                        trials, env.get_num_agents(), x_dim, y_dim, step,
                        score, score / (max_steps + n_agents), 100 * np.mean(
                            tasks_finished / max(1, env.get_num_agents()))),
                    end=" ")

        tasks_finished = 0
        for current_agent in env.agents:
            if current_agent.status == RailAgentStatus.DONE_REMOVED:
                tasks_finished += 1
        done_window.append(tasks_finished / max(1, env.get_num_agents()))
        reward_window.append(score)
        scores_window.append(score / (max_steps + n_agents))

        data = [[
            n_agents, x_dim, y_dim, trials,
            np.mean(reward_window),
            np.mean(scores_window), 100 * np.mean(done_window), step,
            action_prob / np.sum(action_prob)
        ]]

        df_cur = pd.DataFrame(data, columns=columns)
        df_all_results = pd.concat([df_all_results, df_cur])

        if imitate:
            df_all_results.to_csv(
                f'TreeImitationLearning_DQN_TrainingResults.csv', index=False)

        print(
            '\rTrial No {} Training {} Agents on ({},{}).\t Total Steps {}\t Reward: {:.3f}\t Normalized Reward: {:.3f}\tDones: {:.2f}%\t'
            .format(trials, env.get_num_agents(), x_dim, y_dim, step,
                    np.mean(reward_window), np.mean(scores_window),
                    100 * np.mean(done_window)))

        if visuals:
            env_renderer.close_window()

        gc.collect()