def test_env_3(env3_robots):
    env_data = env3_robots.get_env_metadata()
    env3_robots.reset()
    state = env3_robots.get_current_state()
    action = [0, 0, 0]
    action = n_from_prod(env_data['sets'], action)
    new_state, reward, done, _ = env3_robots.step(action)
    new_state_from_obs = get_state_from_observation(new_state)
    assert done == False
    assert reward == -0.666666
    assert state.robots_data == new_state_from_obs.robots_data
    assert state.time + 1 == new_state_from_obs.time
    assert new_state_from_obs.positions == [2, 2, 3]
    assert env3_robots.state.all() == np.array(new_state).all()
    for i in range(3):
        action = [0, 0, 0]
        action = n_from_prod(env_data['sets'], action)
        new_state, reward, done, _ = env3_robots.step(action)
    action = [0, 0, 70]
    action = n_from_prod(env_data['sets'], action)
    new_state, reward, done, _ = env3_robots.step(action)
    assert done == False
    assert reward == 0.001
    assert get_state_from_observation(new_state).time == 6
    assert get_state_from_observation(new_state).robots_data == [15, 30, 0]
    def step(self, action: int) -> Tuple[np.array, float, bool, Dict]:
        # check if the action is in the action space
        if not isinstance(action, list):
            action = int(action)
        else:
            assert self.action_space.contains(
                action), f'{action}, {type(action)} invalid'
            action = n_from_prod(self.__sets, action)
        interpreted_state = get_state_from_observation(
            self.state)  # convert observation to interpreted state
        interpreted_action = self.get_action_from_space(action)
        self.__state_action = interpreted_action
        reward = self.__reward_class.give_reward(interpreted_state,
                                                 interpreted_action,
                                                 self.__meetings,
                                                 self.__cycles_lengths,
                                                 self.__max_memory)

        if reward == settings.REWARD_FOR_INVALID_ACTION:
            # new_state = interpreted_state
            new_state = apply_action_only_increase_time_move_robots(
                interpreted_state, interpreted_action, self.__max_memory,
                self.__cycles)
        else:
            new_state = self.__action_apply(interpreted_state,
                                            interpreted_action,
                                            self.__max_memory, self.__cycles)

        self.state = np.array(get_observation_from_state(new_state))
        return self.state, reward / 1000000, check_if_done(
            new_state, settings.MAXIMUM_NUM_ITER), {}
Exemple #3
0
def test_get_state_from_observation(env3_robots):
    sample = env3_robots.observation_space.sample()
    sample = list(sample)
    state_from_obs = get_state_from_observation(sample)
    assert sample[:3] == state_from_obs.robots_data
    assert sample[3] == state_from_obs.time
    assert sample[4:] == state_from_obs.positions
    assert get_observation_from_state(state_from_obs) == sample
    def select_valid_action(self) -> List[int]:
        """
        Function which selects a valid action for the current state
        :return: int (action index)
        """
        interpreted_state = get_state_from_observation(self.state)
        # print(interpreted_state)
        tmp_sets = []
        for i in range(self.__num_robots * (self.__num_robots - 1) // 2):
            r1, r2 = self.__action_mapping[i]
            rng = list(range(interpreted_state.robots_data[r1] + 1)) + list(
                range(
                    self.__max_memory + 1,
                    self.__max_memory + 1 + interpreted_state.robots_data[r2]))
            tmp_sets.append(rng)

        tmp_lazy_action_get = LazyCartesianProduct(tmp_sets)
        tmp_action = tmp_lazy_action_get.get_nth_element(
            np.random.randint(0, tmp_lazy_action_get.max_size))
        valid_meetings = get_valid_meetings(interpreted_state.time,
                                            self.__meetings,
                                            self.__cycles_lengths)
        # print("VALID MEETINGS: " + str(valid_meetings))
        pair_list = set([(x.r1, x.r2) for x in valid_meetings])
        # print("TMP_ACTION_BEFORE" + str(tmp_action))
        for i in range(len(self.__action_mapping)):
            r1, r2 = self.__action_mapping[i]
            if (r1, r2) not in pair_list:
                tmp_action[i] = 0
            elif r1 == 0:
                if interpreted_state.robots_data[r2] == 0:
                    tmp_action[i] = 0
                else:
                    tmp_action[i] = np.random.randint(
                        self.__max_memory + 1, self.__max_memory + 1 +
                        interpreted_state.robots_data[r2])
        # print("TMP_ACTION_AFTER" + str(tmp_action))
        return tmp_action
 def get_current_state(self) -> State:
     """
     Return current interpreted state of the environment
     """
     return get_state_from_observation(self.state)