Esempio n. 1
0
def test_save_load():
    env = RailEnv(width=10, height=10,
                  rail_generator=complex_rail_generator(nr_start_goal=2, nr_extra=5, min_dist=6, seed=1),
                  schedule_generator=complex_schedule_generator(), number_of_agents=2)
    env.reset()
    agent_1_pos = env.agents[0].position
    agent_1_dir = env.agents[0].direction
    agent_1_tar = env.agents[0].target
    agent_2_pos = env.agents[1].position
    agent_2_dir = env.agents[1].direction
    agent_2_tar = env.agents[1].target
    
    os.makedirs("tmp", exist_ok=True)

    RailEnvPersister.save(env, "tmp/test_save.pkl")
    env.save("tmp/test_save_2.pkl")

    #env.load("test_save.dat")
    env, env_dict = RailEnvPersister.load_new("tmp/test_save.pkl")
    assert (env.width == 10)
    assert (env.height == 10)
    assert (len(env.agents) == 2)
    assert (agent_1_pos == env.agents[0].position)
    assert (agent_1_dir == env.agents[0].direction)
    assert (agent_1_tar == env.agents[0].target)
    assert (agent_2_pos == env.agents[1].position)
    assert (agent_2_dir == env.agents[1].direction)
    assert (agent_2_tar == env.agents[1].target)
def test_save_load():
    env = RailEnv(width=10,
                  height=10,
                  rail_generator=complex_rail_generator(nr_start_goal=2,
                                                        nr_extra=5,
                                                        min_dist=6,
                                                        seed=1),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=2)
    env.reset()
    agent_1_pos = env.agents[0].position
    agent_1_dir = env.agents[0].direction
    agent_1_tar = env.agents[0].target
    agent_2_pos = env.agents[1].position
    agent_2_dir = env.agents[1].direction
    agent_2_tar = env.agents[1].target
    env.save("test_save.dat")
    env.load("test_save.dat")
    assert (env.width == 10)
    assert (env.height == 10)
    assert (len(env.agents) == 2)
    assert (agent_1_pos == env.agents[0].position)
    assert (agent_1_dir == env.agents[0].direction)
    assert (agent_1_tar == env.agents[0].target)
    assert (agent_2_pos == env.agents[1].position)
    assert (agent_2_dir == env.agents[1].direction)
    assert (agent_2_tar == env.agents[1].target)
Esempio n. 3
0
def create_testfiles(parameters, test_nr=0, nr_trials_per_test=100):
    # Parameter initialization
    print('Creating {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(
        test_nr, parameters[0], parameters[1], parameters[2]))
    # Reset environment
    random.seed(parameters[3])
    np.random.seed(parameters[3])
    nr_paths = max(4, parameters[2] + int(0.5 * parameters[2]))
    min_dist = int(min([parameters[0], parameters[1]]) * 0.75)
    env = RailEnv(width=parameters[0],
                  height=parameters[1],
                  rail_generator=complex_rail_generator(nr_start_goal=nr_paths,
                                                        nr_extra=5,
                                                        min_dist=min_dist,
                                                        max_dist=99999,
                                                        seed=parameters[3]),
                  schedule_generator=complex_schedule_generator(),
                  obs_builder_object=TreeObsForRailEnv(max_depth=2),
                  number_of_agents=parameters[2])
    printProgressBar(0,
                     nr_trials_per_test,
                     prefix='Progress:',
                     suffix='Complete',
                     length=20)
    for trial in range(nr_trials_per_test):
        # Reset the env
        env.reset(True, True)
        env.save("./Tests/{}/Level_{}.pkl".format(test_nr, trial))
        printProgressBar(trial + 1,
                         nr_trials_per_test,
                         prefix='Progress:',
                         suffix='Complete',
                         length=20)

    return
Esempio n. 4
0
def demo(args=None):
    """Demo script to check installation"""
    env = RailEnv(width=15,
                  height=15,
                  rail_generator=complex_rail_generator(nr_start_goal=10,
                                                        nr_extra=1,
                                                        min_dist=8,
                                                        max_dist=99999),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=5)

    env._max_episode_steps = int(15 * (env.width + env.height))
    env_renderer = RenderTool(env)

    while True:
        obs, info = env.reset()
        _done = False
        # Run a single episode here
        step = 0
        while not _done:
            # Compute Action
            _action = {}
            for _idx, _ in enumerate(env.agents):
                _action[_idx] = np.random.randint(0, 5)
            obs, all_rewards, done, _ = env.step(_action)
            _done = done['__all__']
            step += 1
            env_renderer.render_env(show=True,
                                    frames=False,
                                    show_observations=False,
                                    show_predictions=False)
            time.sleep(0.3)
    return 0
Esempio n. 5
0
def gen_env(number_agents, width, height, n_start_goal, seed):

    speed_ration_map = {
        1.: 0.25,  # Fast passenger train
        1. / 2.: 0.25,  # Fast freight train
        1. / 3.: 0.25,  # Slow commuter train
        1. / 4.: 0.25
    }  # Slow freight train

    env = RailEnv(width=width,
                  height=height,
                  rail_generator=complex_rail_generator(
                      nr_start_goal=n_start_goal,
                      nr_extra=3,
                      min_dist=6,
                      max_dist=99999,
                      seed=seed),
                  schedule_generator=complex_schedule_generator(
                      speed_ratio_map=speed_ration_map),
                  number_of_agents=number_agents,
                  obs_builder_object=TreeObsForRailEnv(max_depth=5))

    env.reset()
    env.step(dict(zip(range(number_agents), [2] * number_agents)))

    return env
def test_normalize_features():

    random.seed(1)
    np.random.seed(1)
    max_depth = 4

    for i in range(10):
        tree_observer = TreeObsForRailEnv(max_depth=max_depth)
        next_rand_number = random.randint(0, 100)

        env = RailEnv(width=10,
                      height=10,
                      rail_generator=complex_rail_generator(
                          nr_start_goal=10,
                          nr_extra=1,
                          min_dist=8,
                          max_dist=99999,
                          seed=next_rand_number),
                      schedule_generator=complex_schedule_generator(),
                      number_of_agents=1,
                      obs_builder_object=tree_observer)

        obs, all_rewards, done, _ = env.step({0: 0})

        obs_new = tree_observer.get()
        # data, distance, agent_data = split_tree(tree=np.array(obs_old), num_features_per_node=11)
        data_normalized = normalize_observation(obs_new,
                                                max_depth,
                                                observation_radius=10)

        filename = 'testdata/test_array_{}.csv'.format(i)
        data_loaded = np.loadtxt(filename, delimiter=',')

        assert np.allclose(data_loaded, data_normalized)
Esempio n. 7
0
def create_env(number_agents,width,height,n_start_goal,seed):
    env = RailEnv(width=width,
              height=height,
              rail_generator=complex_rail_generator(nr_start_goal=n_start_goal,
                                                    nr_extra=1,
                                                    min_dist=6,
                                                    max_dist=99999,
                                                    seed = seed),
              schedule_generator=complex_schedule_generator(),
              number_of_agents=number_agents)
    
    return env   
Esempio n. 8
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    # Initiate the Predictor
    custom_predictor = ShortestPathPredictorForRailEnv(10)

    # Pass the Predictor to the observation builder
    custom_obs_builder = ObservePredictions(custom_predictor)

    # Initiate Environment
    env = RailEnv(width=10,
                  height=10,
                  rail_generator=complex_rail_generator(nr_start_goal=5,
                                                        nr_extra=1,
                                                        min_dist=8,
                                                        max_dist=99999,
                                                        seed=1),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=3,
                  obs_builder_object=custom_obs_builder)

    obs, info = env.reset()
    env_renderer = RenderTool(env, gl="PILSVG")

    # We render the initial step and show the obsered cells as colored boxes
    env_renderer.render_env(show=True,
                            frames=True,
                            show_observations=True,
                            show_predictions=False)

    action_dict = {}
    for step in range(100):
        for a in range(env.get_num_agents()):
            action = np.random.randint(0, 5)
            action_dict[a] = action
        obs, all_rewards, done, _ = env.step(action_dict)
        print("Rewards: ", all_rewards, "  [done=", done, "]")
        env_renderer.render_env(show=True,
                                frames=True,
                                show_observations=True,
                                show_predictions=False)
        if sleep_for_animation:
            time.sleep(0.5)
Esempio n. 9
0
def test_complex_rail_generator():
    n_agents = 10
    n_start = 2
    x_dim = 10
    y_dim = 10
    min_dist = 4

    # Check that agent number is changed to fit generated level
    env = RailEnv(width=x_dim, height=y_dim,
                  rail_generator=complex_rail_generator(nr_start_goal=n_start, nr_extra=0, min_dist=min_dist),
                  schedule_generator=complex_schedule_generator(), number_of_agents=n_agents)
    env.reset()
    assert env.get_num_agents() == 2
    assert env.rail.grid.shape == (y_dim, x_dim)

    min_dist = 2 * x_dim

    # Check that no agents are generated when level cannot be generated
    env = RailEnv(width=x_dim, height=y_dim,
                  rail_generator=complex_rail_generator(nr_start_goal=n_start, nr_extra=0, min_dist=min_dist),
                  schedule_generator=complex_schedule_generator(), number_of_agents=n_agents)
    env.reset()
    assert env.get_num_agents() == 0
    assert env.rail.grid.shape == (y_dim, x_dim)

    # Check that everything stays the same when correct parameters are given
    min_dist = 2
    n_start = 5
    n_agents = 5

    env = RailEnv(width=x_dim, height=y_dim,
                  rail_generator=complex_rail_generator(nr_start_goal=n_start, nr_extra=0, min_dist=min_dist),
                  schedule_generator=complex_schedule_generator(), number_of_agents=n_agents)
    env.reset()
    assert env.get_num_agents() == n_agents
    assert env.rail.grid.shape == (y_dim, x_dim)
Esempio n. 10
0
def env_creator():
    """
    Creates an env and returns it
    """
    return RailEnv(width=20,
                   height=30,
                   rail_generator=complex_rail_generator(nr_start_goal=100,
                                                         nr_extra=2,
                                                         min_dist=8,
                                                         max_dist=99999,
                                                         seed=False),
                   schedule_generator=complex_schedule_generator(seed=False),
                   obs_builder_object=GlobalObsForRailEnv(),
                   number_of_agents=3,
                   random_seed=True)
def test_multi_speed_init():
    env = RailEnv(width=50,
                  height=50,
                  rail_generator=complex_rail_generator(nr_start_goal=10,
                                                        nr_extra=1,
                                                        min_dist=8,
                                                        max_dist=99999,
                                                        seed=1),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=5)
    # Initialize the agent with the parameters corresponding to the environment and observation_builder
    agent = RandomAgent(218, 4)

    # Empty dictionary for all agent action
    action_dict = dict()

    # Set all the different speeds
    # Reset environment and get initial observations for all agents
    env.reset(False, False, True)

    # Here you can also further enhance the provided observation by means of normalization
    # See training navigation example in the baseline repository
    old_pos = []
    for i_agent in range(env.get_num_agents()):
        env.agents[i_agent].speed_data['speed'] = 1. / (i_agent + 1)
        old_pos.append(env.agents[i_agent].position)

    # Run episode
    for step in range(100):

        # Choose an action for each agent in the environment
        for a in range(env.get_num_agents()):
            action = agent.act(0)
            action_dict.update({a: action})

            # Check that agent did not move in between its speed updates
            assert old_pos[a] == env.agents[a].position

        # Environment step which returns the observations for all agents, their corresponding
        # reward and whether they are done
        _, _, _, _ = env.step(action_dict)

        # Update old position whenever an agent was allowed to move
        for i_agent in range(env.get_num_agents()):
            if (step + 1) % (i_agent + 1) == 0:
                print(step, i_agent, env.agents[i_agent].position)
                old_pos[i_agent] = env.agents[i_agent].position
Esempio n. 12
0
def test_rail_env_speed_intializer():
    speed_ratio_map = {1: 0.3, 2: 0.4, 3: 0.1, 5: 0.2}

    env = RailEnv(width=50, height=50,
                  rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999,
                                                        seed=1), schedule_generator=complex_schedule_generator(),
                  number_of_agents=10)
    env.reset()
    actual_speeds = list(map(lambda agent: agent.speed_data['speed'], env.agents))

    expected_speed_set = set(speed_ratio_map.keys())

    # check that the number of speeds generated is correct
    assert len(actual_speeds) == env.get_num_agents()

    # check that only the speeds defined are generated
    assert all({(actual_speed in expected_speed_set) for actual_speed in actual_speeds})
Esempio n. 13
0
def env_gradual_update(input_env, agent=False, hardness_lvl=1):

    agent_num = input_env.number_of_agents
    env_width = input_env.width + 4
    env_height = input_env.height + 4

    map_agent_ratio = int(np.round(((env_width + env_height) / 2) / 5 - 2))

    if map_agent_ratio > 0:
        agent_num = int(np.round(((env_width + env_height) / 2) / 5 - 2))
    else:
        agent_num = 1

    if hardness_lvl == 1:

        rail_generator = complex_rail_generator(nr_start_goal=20,
                                                nr_extra=1,
                                                min_dist=9,
                                                max_dist=99999,
                                                seed=0)

        schedule_generator = complex_schedule_generator()
    else:

        rail_generator = sparse_rail_generator(nr_start_goal=9,
                                               nr_extra=1,
                                               min_dist=9,
                                               max_dist=99999,
                                               seed=0)

        schedule_generator = sparse_schedule_generator()

    global env, env_renderer, render

    if render:
        env_renderer.close_window()

    env = RailEnv(width=env_width,
                  height=env_height,
                  rail_generator=rail_generator,
                  schedule_generator=schedule_generator,
                  obs_builder_object=GlobalObsForRailEnv(),
                  number_of_agents=agent_num)

    env_renderer = RenderTool(env)
def create_multi_agent_environment(dimension, num_agents, timed, seed):
    # Create new environment.

    env = RailEnv(width=dimension,
                  height=dimension,
                  rail_generator=complex_rail_generator(
                      nr_start_goal=int(1.5 * num_agents),
                      nr_extra=int(1.2 * num_agents),
                      min_dist=int(floor(dimension / 2)),
                      max_dist=99999,
                      seed=0),
                  schedule_generator=complex_schedule_generator(timed=timed),
                  malfunction_generator_and_process_data=None,
                  number_of_agents=num_agents)

    env.reset(random_seed=int(seed))

    return env
Esempio n. 15
0
def create_env(nr_start_goal=10,
               nr_extra=2,
               min_dist=8,
               max_dist=99999,
               nr_agent=10,
               seed=0,
               render_mode='PIL'):
    env = RailEnv(width=30,
                  height=30,
                  rail_generator=complex_rail_generator(
                      nr_start_goal, nr_extra, min_dist, max_dist, seed),
                  schedule_generator=complex_schedule_generator(),
                  obs_builder_object=GlobalObsForRailEnv(),
                  number_of_agents=nr_agent)
    env_renderer = RenderTool(env, gl=render_mode)
    obs = env.reset()

    return env, env_renderer, obs
Esempio n. 16
0
def test_schedule_from_file_complex():
    """
    Test to see that all parameters are loaded as expected
    Returns
    -------

    """
    # Different agent types (trains) with different speeds.
    speed_ration_map = {
        1.: 0.25,  # Fast passenger train
        1. / 2.: 0.25,  # Fast freight train
        1. / 3.: 0.25,  # Slow commuter train
        1. / 4.: 0.25
    }  # Slow freight train

    # Generate complex test env
    rail_generator = complex_rail_generator(nr_start_goal=10,
                                            nr_extra=1,
                                            min_dist=8,
                                            max_dist=99999)
    schedule_generator = complex_schedule_generator(speed_ration_map)

    create_and_save_env(file_name="./complex_env_test.pkl",
                        rail_generator=rail_generator,
                        schedule_generator=schedule_generator)

    # Load the different envs and check the parameters

    # Complex generator
    rail_generator = rail_from_file("./complex_env_test.pkl")
    schedule_generator = schedule_from_file("./complex_env_test.pkl")
    complex_env_from_file = RailEnv(width=1,
                                    height=1,
                                    rail_generator=rail_generator,
                                    schedule_generator=schedule_generator)
    complex_env_from_file.reset(True, True)

    # Assert loaded agent number is correct
    assert complex_env_from_file.get_num_agents() == 10

    # Assert max steps is correct
    assert complex_env_from_file._max_episode_steps == 1350
Esempio n. 17
0
def test_save_load_mpk():
    env = RailEnv(width=10, height=10,
                  rail_generator=complex_rail_generator(nr_start_goal=2, nr_extra=5, min_dist=6, seed=1),
                  schedule_generator=complex_schedule_generator(), number_of_agents=2)
    env.reset()

    os.makedirs("tmp", exist_ok=True)

    RailEnvPersister.save(env, "tmp/test_save.mpk")

    #env.load("test_save.dat")
    env2, env_dict = RailEnvPersister.load_new("tmp/test_save.mpk")
    assert (env.width == env2.width)
    assert (env.height == env2.height)
    assert (len(env2.agents) == len(env.agents))
    
    for agent1, agent2 in zip(env.agents, env2.agents):
        assert(agent1.position == agent2.position)
        assert(agent1.direction == agent2.direction)
        assert(agent1.target == agent2.target)
Esempio n. 18
0
def run_benchmark():
    """Run benchmark on a small number of agents in complex rail environment."""
    random.seed(1)
    np.random.seed(1)

    # Example generate a random rail
    env = RailEnv(width=15,
                  height=15,
                  rail_generator=complex_rail_generator(nr_start_goal=5,
                                                        nr_extra=20,
                                                        min_dist=12),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=5)
    env.reset()

    n_trials = 20
    action_dict = dict()
    action_prob = [0] * 4

    for trials in range(1, n_trials + 1):

        # Reset environment
        obs, info = env.reset()

        # Run episode
        for step in range(100):
            # Action
            for a in range(env.get_num_agents()):
                action = np.random.randint(0, 4)
                action_prob[action] += 1
                action_dict.update({a: action})

            # Environment step
            next_obs, all_rewards, done, _ = env.step(action_dict)

            if done['__all__']:
                break
        if trials % 100 == 0:
            action_prob = [1] * 4
Esempio n. 19
0
def create_env(seed=None):
    """
    Helper function that creates an env everywhere
    This way it only needs to be defined here
    """
    from flatland.envs.rail_env import RailEnv
    from flatland.envs.observations import TreeObsForRailEnv
    from flatland.envs.rail_generators import complex_rail_generator
    from flatland.envs.schedule_generators import complex_schedule_generator
    # TODO make more configurable
    env = RailEnv(width=20,
                  height=20,
                  obs_builder_object=TreeObsForRailEnv(2),
                  rail_generator=complex_rail_generator(nr_start_goal=100,
                                                        nr_extra=2,
                                                        min_dist=8,
                                                        max_dist=99999,
                                                        seed=seed),
                  schedule_generator=complex_schedule_generator(seed=seed),
                  number_of_agents=3,
                  random_seed=seed)
    return env
Esempio n. 20
0
def env_random_update(input_env, decay, agent=False, hardness_lvl=1):

    agent_num = np.random.randint(1, 5)
    env_width = (agent_num + 2) * 5
    env_height = (agent_num + 2) * 5

    if hardness_lvl == 1:

        rail_generator = complex_rail_generator(nr_start_goal=20,
                                                nr_extra=1,
                                                min_dist=9,
                                                max_dist=99999,
                                                seed=0)

        schedule_generator = complex_schedule_generator()
    else:

        rail_generator = sparse_rail_generator(nr_start_goal=9,
                                               nr_extra=1,
                                               min_dist=9,
                                               max_dist=99999,
                                               seed=0)

        schedule_generator = sparse_schedule_generator()

    global env, env_renderer, render

    if render:
        env_renderer.close_window()

    env = RailEnv(width=env_width,
                  height=env_height,
                  rail_generator=rail_generator,
                  schedule_generator=schedule_generator,
                  obs_builder_object=GlobalObsForRailEnv(),
                  number_of_agents=agent_num)

    env_renderer = RenderTool(env)
Esempio n. 21
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    env = RailEnv(width=7,
                  height=7,
                  rail_generator=complex_rail_generator(nr_start_goal=10,
                                                        nr_extra=1,
                                                        min_dist=5,
                                                        max_dist=99999,
                                                        seed=1),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=1,
                  obs_builder_object=SingleAgentNavigationObs())

    obs, info = env.reset()
    env_renderer = RenderTool(env)
    env_renderer.render_env(show=True, frames=True, show_observations=True)
    for step in range(100):
        action = np.argmax(obs[0]) + 1
        obs, all_rewards, done, _ = env.step({0: action})
        print("Rewards: ", all_rewards, "  [done=", done, "]")
        env_renderer.render_env(show=True, frames=True, show_observations=True)
        if sleep_for_animation:
            time.sleep(0.1)
        if done["__all__"]:
            break
    env_renderer.close_window()
Esempio n. 22
0
    height, width, depth = _images[0].shape
    print(len(_images), height, width, depth)
    out = cv2.VideoWriter(f'video_{epoch}.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, (width, height))
    [out.write(cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)) for image in _images]
    out.release()


np.random.seed(1)

# Use the complex_rail_generator to generate feasible network configurations with corresponding tasks
# Training on simple small tasks is the best way to get familiar with the environment

obs_builder = GlobalObsForRailEnv()
env = RailEnv(width=20, height=20,
              rail_generator=complex_rail_generator(nr_start_goal=100, nr_extra=2, min_dist=8, max_dist=99999),
              schedule_generator=complex_schedule_generator(), obs_builder_object=obs_builder,
              number_of_agents=3)
env.reset()

env_renderer = RenderTool(env)

# Import your own Agent or use RLlib to train agents on Flatland
# As an example we use a random agent here
agent_kwargs = {"state_size": 0, "action_size": 5}
controller = RandomController(5)

n_trials = 5

# Empty dictionary for all agent action
action_dict = dict()
print("Starting Training...")
Esempio n. 23
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    test_envs_root = "./railway"
    test_env_file_path = "testing_stuff.pkl"

    test_env_file_path = os.path.join(
        test_envs_root,
        test_env_file_path
    )

    x_dim = 7
    y_dim = 7
    n_agents = 4

    stochastic_data = {'prop_malfunction': 0.05,  # Percentage of defective agents
                       'malfunction_rate': 100,  # Rate of malfunction occurence
                       'min_duration': 20,  # Minimal duration of malfunction
                       'max_duration': 50  # Max duration of malfunction
                       }

    # Different agent types (trains) with different speeds.
    speed_ration_map = {1.: 0.25,  # Fast passenger train
                        1. / 2.: 0.25,  # Fast freight train
                        1. / 3.: 0.25,  # Slow commuter train
                        1. / 4.: 0.25}  # Slow freight train

    # env = RailEnv(width=1, height=1, rail_generator=rail_from_file(test_env_file_path),
    #                    schedule_generator=schedule_from_file(test_env_file_path),
    #                    #malfunction_generator_and_process_data=malfunction_from_file(test_env_file_path),
    #                    obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30)))
    #
    # #env.number_of_agents = n_agents
    # n_agents = env.number_of_agents
    env = RailEnv(width=x_dim,
                  height=y_dim,
                  rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=6, max_dist=99999,seed=1),
                  # sparse_rail_generator(max_num_cities=3,
                  #                                      # Number of cities in map (where train stations are)
                  #                                      seed=1,  # Random seed
                  #                                      grid_mode=False,
                  #                                      max_rails_between_cities=2,
                  #                                      max_rails_in_city=3),
                  schedule_generator=complex_schedule_generator(speed_ration_map),
                  number_of_agents=n_agents,
                  malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
    #
    # env = RailEnv(width=7, height=7,
    #               rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999,
    #                                                     seed=1), schedule_generator=complex_schedule_generator(),
    #               number_of_agents=n_agents,
                  obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30)))

    max_steps = int(4 * 2 * (20 + env.height + env.width))
    obs, info = env.reset(regenerate_rail=True,
            regenerate_schedule=True,
            random_seed=random_seed)
    env_renderer = RenderTool(env, gl="PILSVG")
    env_renderer.render_env(show=True, frames=True, show_observations=True)

    # Reset score and done
    score = 0
    env_done = 0
    step = 0
    for step in range(max_steps):
        action_dict = {}
        for i in range(n_agents):
            if not obs:
                action_dict.update({i: 2})
            elif obs[i] is not None:
                action = np.argmax(obs[i][1:4]) + 1
                action_dict.update({i: action})

        obs, all_rewards, done, _ = env.step(action_dict)
        print("Rewards: ", all_rewards, "  [done=", done, "]")

        for a in range(env.get_num_agents()):
            score += all_rewards[a] / env.get_num_agents()

        env_renderer.render_env(show=True, frames=True, show_observations=True)
        if sleep_for_animation:
            time.sleep(0.5)
        if done["__all__"]:
            break

        # Collection information about training
        tasks_finished = 0
        for current_agent in env.agents:
            if current_agent.status == RailAgentStatus.DONE_REMOVED:
                tasks_finished += 1
        done_window = tasks_finished / max(1, env.get_num_agents())
        scores_window = score / max_steps
        print(
            '\rTraining {} Agents on ({},{}).\t Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format(
                env.get_num_agents(), x_dim, y_dim,
                step,
                np.mean(scores_window),
                100 * np.mean(done_window)), end=" ")

    tasks_finished = 0
    for current_agent in env.agents:
        if current_agent.status == RailAgentStatus.DONE_REMOVED:
            tasks_finished += 1
    done_window = tasks_finished / max(1, env.get_num_agents())
    scores_window = score / max_steps
    print(
        '\rTraining {} Agents on ({},{}).\t Total Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'.format(
            env.get_num_agents(), x_dim, y_dim,
            step,
            np.mean(scores_window),
            100 * np.mean(done_window)), end=" ")

    env_renderer.close_window()
Esempio n. 24
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    test_envs_root = "./railway"
    test_env_file_path = "testing_stuff.pkl"

    test_env_file_path = os.path.join(test_envs_root, test_env_file_path)

    x_dim = 7
    y_dim = 7
    n_agents = 3

    stochastic_data = {
        'prop_malfunction': 0.05,  # Percentage of defective agents
        'malfunction_rate': 100,  # Rate of malfunction occurence
        'min_duration': 2,  # Minimal duration of malfunction
        'max_duration': 5  # Max duration of malfunction
    }

    # Different agent types (trains) with different speeds.
    speed_ration_map = {
        1.: 0.25,  # Fast passenger train
        1. / 2.: 0.25,  # Fast freight train
        1. / 3.: 0.25,  # Slow commuter train
        1. / 4.: 0.25
    }  # Slow freight train

    # env = RailEnv(width=1, height=1, rail_generator=rail_from_file(test_env_file_path),
    #                    schedule_generator=schedule_from_file(test_env_file_path),
    #                    #malfunction_generator_and_process_data=malfunction_from_file(test_env_file_path),
    #                    obs_builder_object=MultipleAgentNavigationObs(max_depth=2, predictor=ShortestPathPredictorForRailEnv(30)))
    #
    # #env.number_of_agents = n_agents
    # n_agents = env.number_of_agents
    env = RailEnv(
        width=x_dim,
        height=y_dim,
        rail_generator=complex_rail_generator(nr_start_goal=10,
                                              nr_extra=1,
                                              min_dist=6,
                                              max_dist=99999,
                                              seed=1),
        # sparse_rail_generator(max_num_cities=3,
        #                                      # Number of cities in map (where train stations are)
        #                                      seed=1,  # Random seed
        #                                      grid_mode=False,
        #                                      max_rails_between_cities=2,
        #                                      max_rails_in_city=3),
        schedule_generator=complex_schedule_generator(speed_ration_map),
        number_of_agents=n_agents,
        malfunction_generator_and_process_data=malfunction_from_params(
            stochastic_data),
        #
        # env = RailEnv(width=7, height=7,
        #               rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999,
        #                                                     seed=1), schedule_generator=complex_schedule_generator(),
        #               number_of_agents=n_agents,
        obs_builder_object=MultipleAgentNavigationObs(
            max_depth=2, predictor=ShortestPathPredictorForRailEnv(30)))

    max_steps = int(4 * 2 * (20 + env.height + env.width))
    obs, info = env.reset(regenerate_rail=True,
                          regenerate_schedule=True,
                          random_seed=random_seed)

    env_renderer = RenderTool(env, gl="PILSVG")
    env_renderer.render_env(show=True, frames=True, show_observations=True)

    # Reset score and done
    score = 0
    env_done = 0
    step = 0
    for step in range(max_steps):

        for i in range(n_agents):
            if obs[i] is not None:
                observations, prediction_data, prediction_pos = obs[i]
                break

        action_dict = {}
        next_shortest_actions = 2 * np.ones(n_agents)
        next_next_shortest_actions = 2 * np.ones(n_agents)
        agent_conflicts = np.zeros((n_agents, n_agents))
        agent_conflicts_count = np.zeros((n_agents, n_agents))
        minDist = -1 * np.ones(n_agents)
        incDiff1 = -1 * np.ones(n_agents)
        incDiff2 = -1 * np.ones(n_agents)
        malfunc = np.zeros(n_agents)
        speed = np.ones(n_agents)
        pos_frac = np.ones(n_agents)
        agent_num_conflicts = []

        vals = []
        counts = []
        counter = np.zeros(n_agents)
        for i in range(30):
            pos = prediction_pos[i]
            val, count = np.unique(pos, return_counts=True)
            if (val[0] == -1):
                val = val[1:]
                count = count[1:]
            vals.append(val)
            counts.append(count)

            for j, curVal in enumerate(val):
                #curVal = vals[i]
                curCount = count[j]
                if curCount > 1:
                    idxs = np.argwhere(pos == curVal)
                    lsIdx = [int(x) for x in idxs]
                    combs = list(combinations(lsIdx, 2))
                    for k, comb in enumerate(combs):
                        counter[comb[0]] += 1
                        counter[comb[1]] += 1
                        agent_conflicts_count[comb[0],
                                              comb[1]] = (counter[comb[0]] +
                                                          counter[comb[1]]) / 2
                        if agent_conflicts[comb[0], comb[1]] == 0:
                            agent_conflicts[comb[0], comb[1]] = i
                        else:
                            agent_conflicts[comb[0], comb[1]] = min(
                                i, agent_conflicts[comb[0], comb[1]])

        for i in range(n_agents):
            agent_num_conflicts.append(sum(agent_conflicts[i, :]))
            if not obs or obs is None or obs[i] is None:
                action_dict.update({i: 2})
            elif obs[i][0] is not None:
                shortest_action = np.argmax(obs[i][0][1:4]) + 1
                next_shortest_action = np.argmax(obs[i][0][5:7]) + 1
                next_next_shortest_action = np.argmax(obs[i][0][8:10]) + 1
                next_shortest_actions[i] = next_shortest_action
                next_next_shortest_actions[i] = next_next_shortest_action
                malfunc[i] = obs[i][0][-3]
                speed[i] = obs[i][0][-2]
                pos_frac[i] = obs[i][0][-1]
                minDist[i] = obs[i][0][0]
                incDiff1[i] = obs[i][0][-5]
                incDiff2[i] = obs[i][0][-4]
                action_dict.update({i: shortest_action})
            else:
                action_dict.update({i: 2})
        mal_agents = (np.array(-1))
        for i in range(n_agents):
            if agent_num_conflicts[i] > 0:
                mal_agents = np.where(malfunc > 0)
                for i, mal_agent in enumerate(mal_agents[0]):
                    if mal_agent is None:
                        break
                    conflict_agents = np.where(
                        agent_conflicts[:, int(mal_agent)] > 0)

                    for j, cur_conflict_agent in enumerate(conflict_agents[0]):
                        cur_conflict_agent = int(cur_conflict_agent)
                        steps_conflict = agent_conflicts[cur_conflict_agent,
                                                         mal_agent]
                        if steps_conflict <= 3:
                            if incDiff1[cur_conflict_agent] == -1:
                                if int(minDist[cur_conflict_agent]) >= 5:
                                    action_dict.update({cur_conflict_agent: 4})
                                elif agent_conflicts_count[cur_conflict_agent,
                                                           mal_agent] > 1:
                                    action_dict.update({cur_conflict_agent: 4})
                            elif minDist[cur_conflict_agent] > incDiff1[
                                    cur_conflict_agent]:
                                action_dict.update({cur_conflict_agent: 4})
                            else:
                                action_dict.update({
                                    cur_conflict_agent:
                                    next_shortest_actions[cur_conflict_agent]
                                })

        obs, all_rewards, done, _ = env.step(action_dict)

        print("Rewards: ", all_rewards, "  [done=", done, "]")

        for a in range(env.get_num_agents()):
            score += all_rewards[a] / env.get_num_agents()

        env_renderer.render_env(show=True, frames=True, show_observations=True)
        if sleep_for_animation:
            time.sleep(0.5)
        if done["__all__"]:
            break

        # Collection information about training
        tasks_finished = 0
        for current_agent in env.agents:
            if current_agent.status == RailAgentStatus.DONE_REMOVED:
                tasks_finished += 1
        done_window = tasks_finished / max(1, env.get_num_agents())
        scores_window = score / max_steps
        print(
            '\rTraining {} Agents on ({},{}).\t Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'
            .format(env.get_num_agents(), x_dim, y_dim, step,
                    np.mean(scores_window), 100 * np.mean(done_window)),
            end=" ")

    tasks_finished = 0
    for current_agent in env.agents:
        if current_agent.status == RailAgentStatus.DONE_REMOVED:
            tasks_finished += 1
    done_window = tasks_finished / max(1, env.get_num_agents())
    scores_window = score / max_steps
    print(
        '\rTraining {} Agents on ({},{}).\t Total Steps {}\t Average Score: {:.3f}\tDones: {:.2f}%\t'
        .format(env.get_num_agents(), x_dim, y_dim, step,
                np.mean(scores_window), 100 * np.mean(done_window)),
        end=" ")

    env_renderer.close_window()
Esempio n. 25
0
speed_ration_map = {
    1.: 0.25,  # Fast passenger train
    1. / 2.: 0.25,  # Fast freight train
    1. / 3.: 0.25,  # Slow commuter train
    1. / 4.: 0.25
}  # Slow freight train

env = RailEnv(width=width,
              height=height,
              rail_generator=complex_rail_generator(nr_start_goal=n_start_goal,
                                                    nr_extra=3,
                                                    min_dist=8,
                                                    max_dist=99999,
                                                    seed=seed),
              schedule_generator=complex_schedule_generator(
                  speed_ratio_map=speed_ration_map),
              number_of_agents=number_agents,
              obs_builder_object=TreeObsForRailEnv(max_depth=5))
env.reset()

env_renderer = RenderTool(env, agent_render_variant=3)
env_renderer.reset()
env_renderer.render_env(show=True,
                        show_predictions=False,
                        show_observations=False)
env.step(dict(zip(range(number_agents), [2] * number_agents)))
#%%
#table = LookUpTable(env.rail.grid, make_table=True)
#dico = table.table
#ag = Agent(env.agents[0])
Esempio n. 26
0
def evalfun(debug=False, refresh=0.1):  # refresh default = 0.1
    # A list of (mapsize, agent count) tuples, change or extend this to test different sizes.
    #problemsizes = [(5, 1), (7, 2), (10,3), (13,4), (40, 20)]

    _seeds = np.random.randint(1, 99, 15)
    #_seed = 2984379

    avg = {}
    for x in range(1, 4):
        avg[x] = []
        problemsizes = [(5, x), (6, x), (8, x), (10, x), (15, x)]
        for problemsize in problemsizes:
            avg_time = 0
            successes = 0
            for seed in _seeds:
                dimension = problemsize[0]
                NUMBER_OF_AGENTS = problemsize[1]

                # Create new environment.
                env = RailEnv(width=dimension,
                              height=dimension,
                              rail_generator=complex_rail_generator(
                                  nr_start_goal=int(1.5 * NUMBER_OF_AGENTS),
                                  nr_extra=int(1.2 * NUMBER_OF_AGENTS),
                                  min_dist=int(floor(dimension / 2)),
                                  max_dist=99999,
                                  seed=0),
                              schedule_generator=complex_schedule_generator(),
                              malfunction_generator_and_process_data=None,
                              number_of_agents=NUMBER_OF_AGENTS)

                env_renderer = RenderTool(env,
                                          screen_width=1920,
                                          screen_height=1080)

                env.reset(random_seed=int(seed))

                if len(env.agents) != NUMBER_OF_AGENTS:
                    continue

                start = time.time()
                schedule = planpath.search(env)
                duration = time.time() - start

                assert env.num_resets == 1 and env._elapsed_steps == 0

                # Run the schedule
                success = False
                if schedule is not None:
                    for action in schedule:
                        _, _, _done, _ = env.step(action)
                        success = _done['__all__']
                        #print(env.agents)

                    if success:
                        avg_time += duration
                        print("Success:", problemsize, seed, duration)
                        successes += 1

                    else:
                        print("Bad schedule - failed.")
                else:
                    print("Couldn't find solution for seed: ", duration, seed)
                    env_renderer.render_env(show=True,
                                            frames=False,
                                            show_observations=False)

            avg_time = avg_time / successes
            avg[x].append((problemsize[0], avg_time))
    #print("%10s\t%8s\t%9s" % ("Dimensions", "Success", "Runtime"))

    #

    # Initialize positions.

    # Time the search.

    #if debug:
    #env_renderer.render_env(show=True, frames=False, show_observations=False)
    #time.sleep(refresh)

    # Validate that environment state is unchanged.

    # Print the performance of the algorithm
    #print("%10s\t%8s\t%9.6f" % (str(problemsize), str(success), duration))
        print(avg)
def evalfun(debug=False, refresh=0.1):  # refresh default = 0.1
    # A list of (mapsize, agent count) tuples, change or extend this to test different sizes.
    #problemsizes = [(5, 1), (7, 2), (10,3), (13,4), (40, 20)]
    #problemsizes = [(5, 1), (5,2), (6,3), (7,3), (14,4), (8,5)]
    problemsizes = [(6, 3)]

    _seed = np.random.randint(1, 9999999)

    #_seed = 2

    print("Seed:", _seed)
    print("%10s\t%8s\t%9s" % ("Dimensions", "Success", "Runtime"))
    for problemsize in problemsizes:

        dimension = problemsize[0]
        NUMBER_OF_AGENTS = problemsize[1]

        # Create new environment.
        env = RailEnv(width=dimension,
                      height=dimension,
                      rail_generator=complex_rail_generator(
                          nr_start_goal=int(1.5 * NUMBER_OF_AGENTS),
                          nr_extra=int(1.2 * NUMBER_OF_AGENTS),
                          min_dist=int(floor(dimension / 2)),
                          max_dist=99999,
                          seed=0),
                      schedule_generator=complex_schedule_generator(),
                      malfunction_generator_and_process_data=None,
                      number_of_agents=NUMBER_OF_AGENTS)

        env_renderer = RenderTool(env, screen_width=1920, screen_height=1080)

        # Initialize positions.
        env.reset(random_seed=_seed)
        env_renderer.render_env(show=True,
                                frames=False,
                                show_observations=False)
        # Time the search.
        start = time.time()
        schedule = planpath.search(env)
        #schedule = planpath.better_search(env)
        duration = time.time() - start

        if debug:
            env_renderer.render_env(show=True,
                                    frames=False,
                                    show_observations=False)
            time.sleep(refresh)

        # Validate that environment state is unchanged.
        assert env.num_resets == 1 and env._elapsed_steps == 0

        # Run the schedule
        success = False
        for action in schedule:
            _, _, _done, _ = env.step(action)
            success = _done['__all__']
            #print(env.agents)
            if debug:
                #for agent in env.agents:
                #if agent.position:
                #agent_y, agent_x = agent.position
                #print(env.get_valid_directions_on_grid(agent_y,agent_x))
                print(action)
                env_renderer.render_env(show=True,
                                        frames=False,
                                        show_observations=False)
                time.sleep(refresh)

        # Print the performance of the algorithm
        print("%10s\t%8s\t%9.6f" % (str(problemsize), str(success), duration))
Esempio n. 28
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "n:", ["n_episodes="])
    except getopt.GetoptError:
        print('training_navigation.py -n <n_episodes>')
        sys.exit(2)
    for opt, arg in opts:
        if opt in ('-n', '--n_episodes'):
            n_episodes = int(arg)

    ## Initialize the random
    random.seed(1)
    np.random.seed(1)

    # Initialize a random map with a random number of agents
    x_dim = np.random.randint(8, 20)
    y_dim = np.random.randint(8, 20)
    n_agents = np.random.randint(3, 8)
    n_goals = n_agents + np.random.randint(0, 3)
    min_dist = int(0.75 * min(x_dim, y_dim))
    tree_depth = 2
    print("main2")
    demo = False

    # Get an observation builder and predictor
    observation_helper = TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv())

    env = RailEnv(width=x_dim,
                  height=y_dim,
                  rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
                                                        max_dist=99999,
                                                        seed=0),
                  schedule_generator=complex_schedule_generator(),
                  obs_builder_object=observation_helper,
                  number_of_agents=n_agents)
    env.reset(True, True)

    handle = env.get_agent_handles()
    features_per_node = env.obs_builder.observation_dim
    nr_nodes = 0
    for i in range(tree_depth + 1):
        nr_nodes += np.power(4, i)
    state_size = 2 * features_per_node * nr_nodes  # We will use two time steps per observation --> 2x state_size
    action_size = 5

    # We set the number of episodes we would like to train on
    if 'n_episodes' not in locals():
        n_episodes = 60000

    # Set max number of steps per episode as well as other training relevant parameter
    max_steps = int(3 * (env.height + env.width))
    eps = 1.
    eps_end = 0.005
    eps_decay = 0.9995
    action_dict = dict()
    final_action_dict = dict()
    scores_window = deque(maxlen=100)
    done_window = deque(maxlen=100)
    time_obs = deque(maxlen=2)
    scores = []
    dones_list = []
    action_prob = [0] * action_size
    agent_obs = [None] * env.get_num_agents()
    agent_next_obs = [None] * env.get_num_agents()
    # Initialize the agent
    agent = Agent(state_size, action_size)

    # Here you can pre-load an agent
    if False:
        with path(torch_training.Nets, "avoid_checkpoint500.pth") as file_in:
            agent.qnetwork_local.load_state_dict(torch.load(file_in))

    # Do training over n_episodes
    for episodes in range(1, n_episodes + 1):
        """
        Training Curriculum: In order to get good generalization we change the number of agents
        and the size of the levels every 50 episodes.
        """
        if episodes % 50 == 0:
            x_dim = np.random.randint(8, 20)
            y_dim = np.random.randint(8, 20)
            n_agents = np.random.randint(3, 8)
            n_goals = n_agents + np.random.randint(0, 3)
            min_dist = int(0.75 * min(x_dim, y_dim))
            env = RailEnv(width=x_dim,
                          height=y_dim,
                          rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
                                                                max_dist=99999,
                                                                seed=0),
                          schedule_generator=complex_schedule_generator(),
                          obs_builder_object=TreeObsForRailEnv(max_depth=3,
                                                               predictor=ShortestPathPredictorForRailEnv()),
                          number_of_agents=n_agents)

            # Adjust the parameters according to the new env.
            max_steps = int(3 * (env.height + env.width))
            agent_obs = [None] * env.get_num_agents()
            agent_next_obs = [None] * env.get_num_agents()

        # Reset environment
        obs, info = env.reset(True, True)

        # Setup placeholder for finals observation of a single agent. This is necessary because agents terminate at
        # different times during an episode
        final_obs = agent_obs.copy()
        final_obs_next = agent_next_obs.copy()

        # Build agent specific observations
        for a in range(env.get_num_agents()):
            data, distance, agent_data = split_tree_into_feature_groups(obs[a], tree_depth)
            data = norm_obs_clip(data)
            distance = norm_obs_clip(distance)
            agent_data = np.clip(agent_data, -1, 1)
            obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))

        # Accumulate two time steps of observation (Here just twice the first state)
        for i in range(2):
            time_obs.append(obs)

        # Build the agent specific double ti
        for a in range(env.get_num_agents()):
            agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))

        score = 0
        env_done = 0
        # Run episode
        for step in range(max_steps):

            # Action
            for a in range(env.get_num_agents()):
                if demo:
                    eps = 0
                # action = agent.act(np.array(obs[a]), eps=eps)
                action = agent.act(agent_obs[a], eps=eps)
                action_prob[action] += 1
                action_dict.update({a: action})
            # Environment step

            next_obs, all_rewards, done, _ = env.step(action_dict)
            for a in range(env.get_num_agents()):
                data, distance, agent_data = split_tree_into_feature_groups(next_obs[a], tree_depth)
                data = norm_obs_clip(data)
                distance = norm_obs_clip(distance)
                agent_data = np.clip(agent_data, -1, 1)
                next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
            time_obs.append(next_obs)

            # Update replay buffer and train agent
            for a in range(env.get_num_agents()):
                agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
                if done[a]:
                    final_obs[a] = agent_obs[a].copy()
                    final_obs_next[a] = agent_next_obs[a].copy()
                    final_action_dict.update({a: action_dict[a]})
                if not demo and not done[a]:
                    agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a])
                score += all_rewards[a] / env.get_num_agents()

            agent_obs = agent_next_obs.copy()
            if done['__all__']:
                env_done = 1
                for a in range(env.get_num_agents()):
                    agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a])
                break
        # Epsilon decay
        eps = max(eps_end, eps_decay * eps)  # decrease epsilon

        done_window.append(env_done)
        scores_window.append(score / max_steps)  # save most recent score
        scores.append(np.mean(scores_window))
        dones_list.append((np.mean(done_window)))

        print(
            '\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
                env.get_num_agents(), x_dim, y_dim,
                episodes,
                np.mean(scores_window),
                100 * np.mean(done_window),
                eps, action_prob / np.sum(action_prob)), end=" ")

        if episodes % 100 == 0:
            print(
                '\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
                    env.get_num_agents(),
                    episodes,
                    np.mean(scores_window),
                    100 * np.mean(done_window),
                    eps,
                    action_prob / np.sum(action_prob)))
            torch.save(agent.qnetwork_local.state_dict(),
                       './Nets/avoid_checkpoint' + str(episodes) + '.pth')
            action_prob = [1] * action_size
    plt.plot(scores)
    plt.show()
Esempio n. 29
0
                    current['valid'] = False
                    current['distance_to_goal'] = np.inf

        return weights


seed = random.randint(0, 2**32)
print(f"Seed: {seed}")
env = RailEnv(width=20,
              height=20,
              rail_generator=complex_rail_generator(nr_start_goal=10,
                                                    nr_extra=10,
                                                    min_dist=5,
                                                    max_dist=99999,
                                                    seed=seed),
              schedule_generator=complex_schedule_generator(),
              number_of_agents=1,
              obs_builder_object=CustomWeightObserver())

env_renderer = RenderTool(env, gl="PILSVG")

agent = AbelAgent(218, 5)
n_trials = 50

for trials in range(1, n_trials + 1):
    # Reset Environment
    obs = env.reset()
    env_renderer.reset()
    env_renderer.render_env(show=True, frames=True, show_observations=True)
    score = 0
Esempio n. 30
0
def run_test(parameters, agent, test_nr=0, tree_depth=3):
    # Parameter initialization
    lp = LineProfiler()
    features_per_node = 9
    start_time_scoring = time.time()
    action_dict = dict()
    nr_trials_per_test = 5
    print('Running Test {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(
        test_nr, parameters[0], parameters[1], parameters[2]))

    # Reset all measurements
    time_obs = deque(maxlen=2)
    test_scores = []
    test_dones = []

    # Reset environment
    random.seed(parameters[3])
    np.random.seed(parameters[3])
    nr_paths = max(2, parameters[2] + int(0.5 * parameters[2]))
    min_dist = int(min([parameters[0], parameters[1]]) * 0.75)
    env = RailEnv(width=parameters[0],
                  height=parameters[1],
                  rail_generator=complex_rail_generator(nr_start_goal=nr_paths,
                                                        nr_extra=5,
                                                        min_dist=min_dist,
                                                        max_dist=99999,
                                                        seed=parameters[3]),
                  schedule_generator=complex_schedule_generator(),
                  obs_builder_object=GlobalObsForRailEnv(),
                  number_of_agents=parameters[2])
    max_steps = int(3 * (env.height + env.width))
    lp_step = lp(env.step)
    lp_reset = lp(env.reset)

    agent_obs = [None] * env.get_num_agents()
    printProgressBar(0,
                     nr_trials_per_test,
                     prefix='Progress:',
                     suffix='Complete',
                     length=20)
    for trial in range(nr_trials_per_test):
        # Reset the env

        lp_reset(True, True)
        obs, info = env.reset(True, True)
        for a in range(env.get_num_agents()):
            data, distance, agent_data = split_tree_into_feature_groups(
                obs[a], tree_depth)
            data = norm_obs_clip(data)
            distance = norm_obs_clip(distance)
            agent_data = np.clip(agent_data, -1, 1)
            obs[a] = np.concatenate((np.concatenate(
                (data, distance)), agent_data))

        for i in range(2):
            time_obs.append(obs)

        for a in range(env.get_num_agents()):
            agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))

        # Run episode
        trial_score = 0
        for step in range(max_steps):

            for a in range(env.get_num_agents()):
                action = agent.act(agent_obs[a], eps=0)
                action_dict.update({a: action})

            # Environment step
            next_obs, all_rewards, done, _ = lp_step(action_dict)

            for a in range(env.get_num_agents()):
                data, distance, agent_data = split_tree_into_feature_groups(
                    next_obs[a], tree_depth)
                data = norm_obs_clip(data)
                distance = norm_obs_clip(distance)
                agent_data = np.clip(agent_data, -1, 1)
                next_obs[a] = np.concatenate((np.concatenate(
                    (data, distance)), agent_data))
            time_obs.append(next_obs)
            for a in range(env.get_num_agents()):
                agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
                trial_score += all_rewards[a] / env.get_num_agents()

            if done['__all__']:
                break
        test_scores.append(trial_score / max_steps)
        test_dones.append(done['__all__'])
        printProgressBar(trial + 1,
                         nr_trials_per_test,
                         prefix='Progress:',
                         suffix='Complete',
                         length=20)
    end_time_scoring = time.time()
    tot_test_time = end_time_scoring - start_time_scoring
    lp.print_stats()
    return test_scores, test_dones, tot_test_time