Beispiel #1
0
    def load_dataset(file):
        D = np.load(file)

        assert D.shape[1] == 32

        world_size = (int(D[0][0]), int(D[0][1]))

        dataset = []

        for d in D:

            s = d[2:12]
            a = d[12:20]
            R = d[20]
            s_ = d[21:31]
            T = True if d[31] == 1 else 0

            S = PursuitState.from_features(s, world_size)
            S_ = PursuitState.from_features(s_, world_size)

            A = []
            for i in range(4):
                direction = (int(a[i * 2]), int(a[i * 2 + 1]))
                action = agent_directions().index(direction)
                A.append(action)

            A = tuple(A)

            datapoint = S, A, R, S_, T
            dataset.append(datapoint)

        return dataset
Beispiel #2
0
def infer_prey_action(state, next_state):
    """
    Returns the action for the prey (0, 1, 2, 3, 4, 5)
     """
    num_actions = len(agent_directions())

    x = state.prey_positions[0][0]
    y = state.prey_positions[0][1]

    x_new = next_state.prey_positions[0][0]
    y_new = next_state.prey_positions[0][1]

    columns = state.world_size[0]
    rows = state.world_size[1]

    moves = [
        (x_new - x) % columns == 1,  # Moved Right
        (x - x_new) % columns == 1,  # Moved Left
        (y_new - y) % rows == 1,  # Moved Down
        (y - y_new) % rows == 1
    ]  # Moved Up

    stayed_in_place = True not in moves

    if stayed_in_place:
        return 0
    else:  # Find out which way the prey went (R, L, D or U)
        for a in range(num_actions):
            if moves[a]: return a + 1
Beispiel #3
0
def actions_one_hot_encoding(joint_actions):
    """One hot encoding for all actions"""
    num_agents = len(joint_actions)
    num_actions = len(agent_directions())
    actions_one_hot = np.zeros((num_agents, num_actions))
    actions_one_hot[range(num_agents), joint_actions] = 1
    actions_one_hot = actions_one_hot.reshape((num_agents * num_actions, ))
    return actions_one_hot
Beispiel #4
0
    def transition(pursuit_state, joint_action, deterministic=False):

        action_space = agent_directions()
        world_size = pursuit_state.world_size
        num_agents = len(pursuit_state.agents_positions)
        num_preys = len(pursuit_state.prey_positions)
        occupied_positions = set(pursuit_state.prey_positions) | set(
            pursuit_state.agents_positions)

        directions = [action_space[a] for a in joint_action]
        agents_positions = [None] * num_agents
        prey_positions = [None] * num_preys
        agent_indices = [(i, True) for i in range(num_agents)
                         ] + [(i, False) for i in range(num_preys)]

        if not deterministic:
            np.random.shuffle(agent_indices)

        for i, is_agent in agent_indices:

            if is_agent:
                position = pursuit_state.agents_positions[i]
                direction = directions[i]
            else:
                position = pursuit_state.prey_positions[i]
                direction = PursuitState.move_prey_randomly()

            new_position = move(position, direction, world_size)

            # If collision is detected, just go to the original position
            if new_position in occupied_positions:
                new_position = position

            occupied_positions.remove(position)
            occupied_positions.add(new_position)

            if is_agent:
                agents_positions[i] = new_position
            else:
                prey_positions[i] = new_position

        next_pursuit_state = PursuitState(tuple(agents_positions),
                                          tuple(prey_positions), world_size)
        reward = 100 if next_pursuit_state.is_terminal else -1.0

        return next_pursuit_state, reward
Beispiel #5
0
def compute_displacement(state, next_state, entity):
    """Computes displacement for all agents, i.e., st+1 - st"""

    num_actions = len(agent_directions())
    movement_vector = np.zeros((1, num_actions + 1))

    num_agents = len(state.agents_positions)
    is_prey = entity == num_agents

    if is_prey:
        x = state.prey_positions[0][0]
        y = state.prey_positions[0][1]
        x_new = next_state.prey_positions[0][0]
        y_new = next_state.prey_positions[0][1]
    else:
        x = state.agents_positions[entity][0]
        y = state.agents_positions[entity][1]
        x_new = next_state.agents_positions[entity][0]
        y_new = next_state.agents_positions[entity][1]

    columns = state.world_size[0]
    rows = state.world_size[1]

    moves = [
        (x_new - x) % columns == 1,  # Moved Right
        (x - x_new) % columns == 1,  # Moved Left
        (y_new - y) % rows == 1,  # Moved Down
        (y - y_new) % rows == 1
    ]  # Moved Up

    stayed_in_place = True not in moves

    if stayed_in_place:
        movement_vector[0][0] = 1
    else:
        # Find out which way the entity went (R, L, D or U)
        for a in range(num_actions):
            if moves[a]:
                movement_vector[0][a + 1] = 1
                break

    return movement_vector
Beispiel #6
0
    def save_dataset(dataset, file):

        m = len(dataset)
        D = np.zeros((m, 32))
        world_size = dataset[0][0].world_size

        for i, datapoint in enumerate(dataset):
            S, A, R, S_, T = datapoint

            s = S.features()
            A = [agent_directions()[a] for a in A]  #FIXME
            a = tuple([a_coor for a in A for a_coor in a])
            s_ = S_.features()

            D[i, 0] = world_size[0]
            D[i, 1] = world_size[1]
            D[i, 2:12] = s
            D[i, 12:20] = a
            D[i, 20] = R
            D[i, 21:31] = s_
            D[i, 31] = 1 if T else 0

        np.save(file, D)