Beispiel #1
0
    def load_dataset(file):
        D = np.load(file)

        assert D.shape[1] == 32

        world_size = (int(D[0][0]), int(D[0][1]))

        dataset = []

        for d in D:

            s = d[2:12]
            a = d[12:20]
            R = d[20]
            s_ = d[21:31]
            T = True if d[31] == 1 else 0

            S = PursuitState.from_features(s, world_size)
            S_ = PursuitState.from_features(s_, world_size)

            A = []
            for i in range(4):
                direction = (int(a[i * 2]), int(a[i * 2 + 1]))
                action = agent_directions().index(direction)
                A.append(action)

            A = tuple(A)

            datapoint = S, A, R, S_, T
            dataset.append(datapoint)

        return dataset
Beispiel #2
0
 def extract_features(self, state: PursuitState):
     if self._feature_extraction_mode == "default":
         return state.features()
     elif self._feature_extraction_mode == "relative agent":
         return state.features_relative_agent(agent_id=0)
     elif self._feature_extraction_mode == "relative prey":
         return state.features_relative_prey()
     else:
         raise ValueError(
             f"Invalid feature extraction mode {self._feature_extraction_mode}"
         )
Beispiel #3
0
def pursuit_datapoint(timestep, world_size):
    from environment.PursuitState import PursuitState
    obs, action, reward, next_obs, terminal, info = timestep
    state = PursuitState.from_features(obs, world_size)
    next_state = PursuitState.from_features(next_obs, world_size)
    joint_actions = [action] + [
        teammate_action
        for teammate_action in info["teammates actions"].values()
    ]
    datapoint = state, tuple(joint_actions), reward, next_state, terminal
    return datapoint
Beispiel #4
0
    def transition(pursuit_state, joint_action, deterministic=False):

        action_space = agent_directions()
        world_size = pursuit_state.world_size
        num_agents = len(pursuit_state.agents_positions)
        num_preys = len(pursuit_state.prey_positions)
        occupied_positions = set(pursuit_state.prey_positions) | set(
            pursuit_state.agents_positions)

        directions = [action_space[a] for a in joint_action]
        agents_positions = [None] * num_agents
        prey_positions = [None] * num_preys
        agent_indices = [(i, True) for i in range(num_agents)
                         ] + [(i, False) for i in range(num_preys)]

        if not deterministic:
            np.random.shuffle(agent_indices)

        for i, is_agent in agent_indices:

            if is_agent:
                position = pursuit_state.agents_positions[i]
                direction = directions[i]
            else:
                position = pursuit_state.prey_positions[i]
                direction = PursuitState.move_prey_randomly()

            new_position = move(position, direction, world_size)

            # If collision is detected, just go to the original position
            if new_position in occupied_positions:
                new_position = position

            occupied_positions.remove(position)
            occupied_positions.add(new_position)

            if is_agent:
                agents_positions[i] = new_position
            else:
                prey_positions[i] = new_position

        next_pursuit_state = PursuitState(tuple(agents_positions),
                                          tuple(prey_positions), world_size)
        reward = 100 if next_pursuit_state.is_terminal else -1.0

        return next_pursuit_state, reward
Beispiel #5
0
 def predict_teammate_policy(self, teammate_id, state):
     x = PursuitState.features_relative_agent(state, teammate_id)
     x.reshape(1, -1)
     model = self.models[teammate_id - 1]
     scores = model.predict(x)
     policy = softmax(scores, dim=0).numpy()
     policy /= policy.sum()
     return policy
Beispiel #6
0
def build_mdp_features(state, joint_actions):
    """Features used for environment models"""

    coordinates = PursuitState.features(state)
    actions_one_hot = actions_one_hot_encoding(joint_actions)

    # St + At (one hot encoded)
    mdp_features = np.concatenate((coordinates, actions_one_hot))

    return mdp_features
Beispiel #7
0
    def __init__(self,
                 teammates="greedy",
                 num_teammates=3,
                 world_size=(5, 5),
                 features="default",
                 deterministic=False,
                 initial_state=None):

        super(Pursuit, self).__init__()

        self.action_space = Discrete(4)
        self.reward_range = (-np.inf, np.inf)
        self.metadata = {}

        self._num_agents = num_teammates + 1
        self._action_descriptions = action_meanings()
        self._feature_extraction_mode = features
        self.num_actions = 4
        self.observation_space = Box(low=-np.inf,
                                     high=np.inf,
                                     shape=(self.num_features, ),
                                     dtype=np.float64)
        self.name = "Pursuit"

        self._world_size = world_size

        self._team_name = teammates
        self._teammates = self._initialize_teammates(teammates, num_teammates)
        self._pursuit_state = None
        self._first_render = True

        if deterministic and initial_state is not None:
            self._initial_state = lambda: initial_state
        elif deterministic:
            self._initial_state = PursuitState.random_state(
                num_teammates + 1, world_size)
        else:
            self._initial_state = lambda: PursuitState.random_state(
                self._num_agents, self._world_size)
        self._deterministic = deterministic
Beispiel #8
0
    def prepare_individual_batches(self, batch):

        m = len(batch)
        F = self.features

        N = len(self.models)

        X = [np.zeros((m, F)) for _ in range(N)]
        Y = [np.zeros(m) for _ in range(N)]

        for i, datapoint in enumerate(batch):
            state, joint_actions, reward, next_state, terminal = datapoint
            for t in range(N):
                teammate = t + 1
                teammate_action = joint_actions[teammate]
                x = PursuitState.features_relative_agent(state, teammate)
                X[t][i] = x
                Y[t][i] = teammate_action

        return X, Y
Beispiel #9
0
def remove_collisions(state, next_state):
    """Randomly rolls back colliding agents in a state st+1 back to their original positions in state st"""

    vacancy_constant = state.world_size[0] + 99

    old_positions = state.features()
    positions = next_state.features()
    num_agents = 4

    indices = list(range(num_agents + 1))
    random.shuffle(indices)

    collision = False

    for target in indices:

        x1 = positions[target * 2 + 0]
        y1 = positions[target * 2 + 1]

        for other in range(num_agents + 1):

            if target == other:
                continue

            x2 = positions[other * 2 + 0]
            y2 = positions[other * 2 + 1]

            collision = x2 == x1 and y2 == y1

            if collision:
                break

        positions = roll_back(target, positions, old_positions,
                              vacancy_constant) if collision else positions

    corrected_next_state = PursuitState.from_features(positions,
                                                      state.world_size)

    return corrected_next_state
Beispiel #10
0
 def policy(self, observation):
     pursuit_state = PursuitState.from_features(observation,
                                                self.world_size)
     action = self.select_action_according_to_model(
         pursuit_state, self.most_likely_model())
     return deterministic_policy(action, num_actions=4)