예제 #1
0
def tests_random_interference_from_outside():
    """Tests that malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    rail, rail_map = make_simple_rail2()
    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(seed=2),
                  number_of_agents=1,
                  random_seed=1)
    env.reset()
    env.agents[0].speed_data['speed'] = 0.33
    env.reset(False, False, False, random_seed=10)
    env_data = []

    for step in range(200):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
            action_dict[agent.handle] = RailEnvActions(2)

        _, reward, _, _ = env.step(action_dict)
        # Append the rewards of the first trial
        env_data.append((reward[0], env.agents[0].position))
        assert reward[0] == env_data[step][0]
        assert env.agents[0].position == env_data[step][1]
    # Run the same test as above but with an external random generator running
    # Check that the reward stays the same

    rail, rail_map = make_simple_rail2()
    random.seed(47)
    np.random.seed(1234)
    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(seed=2),
                  number_of_agents=1,
                  random_seed=1)
    env.reset()
    env.agents[0].speed_data['speed'] = 0.33
    env.reset(False, False, False, random_seed=10)

    dummy_list = [1, 2, 6, 7, 8, 9, 4, 5, 4]
    for step in range(200):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
            action_dict[agent.handle] = RailEnvActions(2)

            # Do dummy random number generations
            random.shuffle(dummy_list)
            np.random.rand()

        _, reward, _, _ = env.step(action_dict)
        assert reward[0] == env_data[step][0]
        assert env.agents[0].position == env_data[step][1]
def test_single_malfunction_generator():
    """
    Test single malfunction generator
    Returns
    -------

    """

    rail, rail_map = make_simple_rail2()
    env = RailEnv(
        width=25,
        height=30,
        rail_generator=rail_from_grid_transition_map(rail),
        schedule_generator=random_schedule_generator(),
        number_of_agents=10,
        malfunction_generator_and_process_data=single_malfunction_generator(
            earlierst_malfunction=10, malfunction_duration=5))
    for test in range(10):
        env.reset()
        action_dict = dict()
        tot_malfunctions = 0
        print(test)
        for i in range(10):
            for agent in env.agents:
                # Go forward all the time
                action_dict[agent.handle] = RailEnvActions(2)

            env.step(action_dict)
        for agent in env.agents:
            # Go forward all the time
            tot_malfunctions += agent.malfunction_data['nr_malfunctions']
        assert tot_malfunctions == 1
예제 #3
0
    def bfs(self,
            node: Node,
            node_observations: np.ndarray,
            current_level=0,
            abs_pos=0):
        """
        Depth first search, as operation should be used the inference
        :param abs_pos: absolute index in flat obs vector
        :param current_level: current level of node in the tree (how deep)
        :param node_observations: accumulated obs vectors of nodes
        :param node: current node
        """

        node_obs = _get_small_node_feature_vector(
            node) if self._small_tree else _get_node_feature_vector(node)
        node_observations[abs_pos, :] = node_obs
        abs_pos += 1

        for action in self._available_actions:
            filtered = list(
                filter(lambda k: k == RailEnvActions.to_char(action.value),
                       node.childs.keys()))
            if len(filtered) == 1 and not isinstance(node.childs[filtered[0]],
                                                     float):
                abs_pos = self.bfs(node.childs[filtered[0]],
                                   node_observations,
                                   current_level=current_level + 1,
                                   abs_pos=abs_pos)
            elif current_level != self._builder.max_depth:
                abs_pos += self._count_missing_nodes(current_level + 1)

        return abs_pos
예제 #4
0
def get_action_masking(env, agent, action_size, train_params):
    """

    :param env: the environment
    :param agent: the agent index/handler
    :param action_size: the environment's number of available actions
    :param train_params: training parameters to customize the mask
    :return: the action mask for the passed agent
    """

    # Mask initialization
    action_mask = [1 * (0 if action == RailEnvActions.DO_NOTHING and not train_params.allow_no_op else 1)
                   for action in range(action_size)]

    # Mask filling
    if train_params.action_masking:
        for action in range(action_size):
            """
            Control if the agent is in the scene has a position, excluding when it has been arrived and removed
            and when has not already started. In these cases the action masks is the initial one.
            """
            if env.get_rail_env().agents[agent].position is not None:

                _, cell_valid, _, _, transition_valid = env.get_rail_env()._check_action_on_agent(
                    RailEnvActions(action),
                    env.get_rail_env().agents[agent])

                if not all([cell_valid, transition_valid]):
                    action_mask[action] = 0

    return action_mask
예제 #5
0
def test_malfunction_process_statistically():
    """Tests that malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = MalfunctionParameters(
        malfunction_rate=1 / 5,  # Rate of malfunction occurence
        min_duration=5,  # Minimal duration of malfunction
        max_duration=5  # Max duration of malfunction
    )

    rail, rail_map = make_simple_rail2()

    env = RailEnv(
        width=25,
        height=30,
        rail_generator=rail_from_grid_transition_map(rail),
        schedule_generator=random_schedule_generator(),
        number_of_agents=10,
        malfunction_generator_and_process_data=malfunction_from_params(
            stochastic_data),
        obs_builder_object=SingleAgentNavigationObs())

    env.reset(True, True, False, random_seed=10)

    env.agents[0].target = (0, 0)
    # Next line only for test generation
    # agent_malfunction_list = [[] for i in range(10)]
    agent_malfunction_list = [
        [0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1, 0, 0, 0, 5, 4],
        [0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2],
        [0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1],
        [0, 0, 5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0],
        [5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 5],
        [5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 3, 2],
        [5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 5, 4]
    ]

    for step in range(20):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent_idx in range(env.get_num_agents()):
            # We randomly select an action
            action_dict[agent_idx] = RailEnvActions(np.random.randint(4))
            # For generating tests only:
            # agent_malfunction_list[agent_idx].append(env.agents[agent_idx].malfunction_data['malfunction'])
            assert env.agents[agent_idx].malfunction_data[
                'malfunction'] == agent_malfunction_list[agent_idx][step]
        env.step(action_dict)
예제 #6
0
def test_last_malfunction_step():
    """
    Test to check that agent moves when it is not malfunctioning

    """

    # Set fixed malfunction duration for this test

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(seed=2),
                  number_of_agents=1,
                  random_seed=1)
    env.reset()
    env.agents[0].speed_data['speed'] = 1. / 3.
    env.agents[0].target = (0, 0)

    env.reset(False, False, True)
    # Force malfunction to be off at beginning and next malfunction to happen in 2 steps
    env.agents[0].malfunction_data['next_malfunction'] = 2
    env.agents[0].malfunction_data['malfunction'] = 0
    env_data = []
    for step in range(20):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # Go forward all the time
            action_dict[agent.handle] = RailEnvActions(2)

        if env.agents[0].malfunction_data['malfunction'] < 1:
            agent_can_move = True
        # Store the position before and after the step
        pre_position = env.agents[0].speed_data['position_fraction']
        _, reward, _, _ = env.step(action_dict)
        # Check if the agent is still allowed to move in this step

        if env.agents[0].malfunction_data['malfunction'] > 0:
            agent_can_move = False
        post_position = env.agents[0].speed_data['position_fraction']
        # Assert that the agent moved while it was still allowed
        if agent_can_move:
            assert pre_position != post_position
        else:
            assert post_position == pre_position
예제 #7
0
    def _check_invalid_transitions(self, action_dict):
        """

        :param action_dict: dictionary containing for each agent the decided action
        :return: the penalties based on attempted invalid transitions
        """
        rewards = {}
        for agent in range(self.unwrapped.rail_env.get_num_agents()):
            if self.unwrapped.rail_env.agents[
                    agent].status == RailAgentStatus.ACTIVE:
                _, cell_valid, _, _, transition_valid = self.unwrapped.rail_env._check_action_on_agent(
                    RailEnvActions(action_dict[agent] if agent in
                                   action_dict else 0),
                    self.unwrapped.rail_env.agents[agent])
                if not all([cell_valid, transition_valid]):
                    rewards[agent] = self.invalid_action_penalty
                else:
                    rewards[agent] = 0.0
            else:
                rewards[agent] = 0.0

        return rewards
예제 #8
0
 def process_action(self, action):
     return {0: RailEnvActions(action)}
예제 #9
0
    def process_step(self, observation, reward, done, info):
        if not done:
            observation = self.process_observation(observation)
        reward = self.process_reward(reward)
        return observation, reward, done, {}


nb_actions = env.action_space[0]
nb_actions

og_input_shape = np.array(env.step({0: RailEnvActions.DO_NOTHING
                                    })[0][0][1]).flatten().shape
og_input_shape
"""Build a model, does annoying error which I was not able to fix yet"""

RailEnvActions(4)

print(np.array(env.reset()[0][0][1]).flatten().shape)
print(
    np.array(env.step({0:
                       RailEnvActions.STOP_MOVING})[0][0][1]).flatten().shape)
print(
    np.array(env.step({0:
                       RailEnvActions.MOVE_FORWARD})[0][0][1]).flatten().shape)

input_shape = (WINDOW_LENGTH, ) + og_input_shape
model = Sequential()
timesteps = 1  #1 timestep as tree is only 1 deep
model.add(Input(shape=input_shape, name="INSERIMENTO_DATI"))
#model.add(Embedding(input_dim=input_shape[0], output_dim=64))
model.add(