def tests_random_interference_from_outside(): """Tests that malfunctions are produced by stochastic_data!""" # Set fixed malfunction duration for this test rail, rail_map = make_simple_rail2() env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(seed=2), number_of_agents=1, random_seed=1) env.reset() env.agents[0].speed_data['speed'] = 0.33 env.reset(False, False, False, random_seed=10) env_data = [] for step in range(200): action_dict: Dict[int, RailEnvActions] = {} for agent in env.agents: # We randomly select an action action_dict[agent.handle] = RailEnvActions(2) _, reward, _, _ = env.step(action_dict) # Append the rewards of the first trial env_data.append((reward[0], env.agents[0].position)) assert reward[0] == env_data[step][0] assert env.agents[0].position == env_data[step][1] # Run the same test as above but with an external random generator running # Check that the reward stays the same rail, rail_map = make_simple_rail2() random.seed(47) np.random.seed(1234) env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(seed=2), number_of_agents=1, random_seed=1) env.reset() env.agents[0].speed_data['speed'] = 0.33 env.reset(False, False, False, random_seed=10) dummy_list = [1, 2, 6, 7, 8, 9, 4, 5, 4] for step in range(200): action_dict: Dict[int, RailEnvActions] = {} for agent in env.agents: # We randomly select an action action_dict[agent.handle] = RailEnvActions(2) # Do dummy random number generations random.shuffle(dummy_list) np.random.rand() _, reward, _, _ = env.step(action_dict) assert reward[0] == env_data[step][0] assert env.agents[0].position == env_data[step][1]
def test_single_malfunction_generator(): """ Test single malfunction generator Returns ------- """ rail, rail_map = make_simple_rail2() env = RailEnv( width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=10, malfunction_generator_and_process_data=single_malfunction_generator( earlierst_malfunction=10, malfunction_duration=5)) for test in range(10): env.reset() action_dict = dict() tot_malfunctions = 0 print(test) for i in range(10): for agent in env.agents: # Go forward all the time action_dict[agent.handle] = RailEnvActions(2) env.step(action_dict) for agent in env.agents: # Go forward all the time tot_malfunctions += agent.malfunction_data['nr_malfunctions'] assert tot_malfunctions == 1
def bfs(self, node: Node, node_observations: np.ndarray, current_level=0, abs_pos=0): """ Depth first search, as operation should be used the inference :param abs_pos: absolute index in flat obs vector :param current_level: current level of node in the tree (how deep) :param node_observations: accumulated obs vectors of nodes :param node: current node """ node_obs = _get_small_node_feature_vector( node) if self._small_tree else _get_node_feature_vector(node) node_observations[abs_pos, :] = node_obs abs_pos += 1 for action in self._available_actions: filtered = list( filter(lambda k: k == RailEnvActions.to_char(action.value), node.childs.keys())) if len(filtered) == 1 and not isinstance(node.childs[filtered[0]], float): abs_pos = self.bfs(node.childs[filtered[0]], node_observations, current_level=current_level + 1, abs_pos=abs_pos) elif current_level != self._builder.max_depth: abs_pos += self._count_missing_nodes(current_level + 1) return abs_pos
def get_action_masking(env, agent, action_size, train_params): """ :param env: the environment :param agent: the agent index/handler :param action_size: the environment's number of available actions :param train_params: training parameters to customize the mask :return: the action mask for the passed agent """ # Mask initialization action_mask = [1 * (0 if action == RailEnvActions.DO_NOTHING and not train_params.allow_no_op else 1) for action in range(action_size)] # Mask filling if train_params.action_masking: for action in range(action_size): """ Control if the agent is in the scene has a position, excluding when it has been arrived and removed and when has not already started. In these cases the action masks is the initial one. """ if env.get_rail_env().agents[agent].position is not None: _, cell_valid, _, _, transition_valid = env.get_rail_env()._check_action_on_agent( RailEnvActions(action), env.get_rail_env().agents[agent]) if not all([cell_valid, transition_valid]): action_mask[action] = 0 return action_mask
def test_malfunction_process_statistically(): """Tests that malfunctions are produced by stochastic_data!""" # Set fixed malfunction duration for this test stochastic_data = MalfunctionParameters( malfunction_rate=1 / 5, # Rate of malfunction occurence min_duration=5, # Minimal duration of malfunction max_duration=5 # Max duration of malfunction ) rail, rail_map = make_simple_rail2() env = RailEnv( width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=10, malfunction_generator_and_process_data=malfunction_from_params( stochastic_data), obs_builder_object=SingleAgentNavigationObs()) env.reset(True, True, False, random_seed=10) env.agents[0].target = (0, 0) # Next line only for test generation # agent_malfunction_list = [[] for i in range(10)] agent_malfunction_list = [ [0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1, 0, 0, 0, 5, 4], [0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2], [0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1], [0, 0, 5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1], [0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0], [5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 5], [5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 3, 2], [5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 5, 4] ] for step in range(20): action_dict: Dict[int, RailEnvActions] = {} for agent_idx in range(env.get_num_agents()): # We randomly select an action action_dict[agent_idx] = RailEnvActions(np.random.randint(4)) # For generating tests only: # agent_malfunction_list[agent_idx].append(env.agents[agent_idx].malfunction_data['malfunction']) assert env.agents[agent_idx].malfunction_data[ 'malfunction'] == agent_malfunction_list[agent_idx][step] env.step(action_dict)
def test_last_malfunction_step(): """ Test to check that agent moves when it is not malfunctioning """ # Set fixed malfunction duration for this test rail, rail_map = make_simple_rail2() env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(seed=2), number_of_agents=1, random_seed=1) env.reset() env.agents[0].speed_data['speed'] = 1. / 3. env.agents[0].target = (0, 0) env.reset(False, False, True) # Force malfunction to be off at beginning and next malfunction to happen in 2 steps env.agents[0].malfunction_data['next_malfunction'] = 2 env.agents[0].malfunction_data['malfunction'] = 0 env_data = [] for step in range(20): action_dict: Dict[int, RailEnvActions] = {} for agent in env.agents: # Go forward all the time action_dict[agent.handle] = RailEnvActions(2) if env.agents[0].malfunction_data['malfunction'] < 1: agent_can_move = True # Store the position before and after the step pre_position = env.agents[0].speed_data['position_fraction'] _, reward, _, _ = env.step(action_dict) # Check if the agent is still allowed to move in this step if env.agents[0].malfunction_data['malfunction'] > 0: agent_can_move = False post_position = env.agents[0].speed_data['position_fraction'] # Assert that the agent moved while it was still allowed if agent_can_move: assert pre_position != post_position else: assert post_position == pre_position
def _check_invalid_transitions(self, action_dict): """ :param action_dict: dictionary containing for each agent the decided action :return: the penalties based on attempted invalid transitions """ rewards = {} for agent in range(self.unwrapped.rail_env.get_num_agents()): if self.unwrapped.rail_env.agents[ agent].status == RailAgentStatus.ACTIVE: _, cell_valid, _, _, transition_valid = self.unwrapped.rail_env._check_action_on_agent( RailEnvActions(action_dict[agent] if agent in action_dict else 0), self.unwrapped.rail_env.agents[agent]) if not all([cell_valid, transition_valid]): rewards[agent] = self.invalid_action_penalty else: rewards[agent] = 0.0 else: rewards[agent] = 0.0 return rewards
def process_action(self, action): return {0: RailEnvActions(action)}
def process_step(self, observation, reward, done, info): if not done: observation = self.process_observation(observation) reward = self.process_reward(reward) return observation, reward, done, {} nb_actions = env.action_space[0] nb_actions og_input_shape = np.array(env.step({0: RailEnvActions.DO_NOTHING })[0][0][1]).flatten().shape og_input_shape """Build a model, does annoying error which I was not able to fix yet""" RailEnvActions(4) print(np.array(env.reset()[0][0][1]).flatten().shape) print( np.array(env.step({0: RailEnvActions.STOP_MOVING})[0][0][1]).flatten().shape) print( np.array(env.step({0: RailEnvActions.MOVE_FORWARD})[0][0][1]).flatten().shape) input_shape = (WINDOW_LENGTH, ) + og_input_shape model = Sequential() timesteps = 1 #1 timestep as tree is only 1 deep model.add(Input(shape=input_shape, name="INSERIMENTO_DATI")) #model.add(Embedding(input_dim=input_shape[0], output_dim=64)) model.add(