Ejemplo n.º 1
0
    def __init__(self,
                 board_size=15,
                 num_players=4,
                 spawn_offset=2,
                 agent=SimpleAvoidAgent()):
        self.env = TronGridEnvironment.create(board_size=board_size,
                                              num_players=num_players)
        self.state = None
        self.players = None
        self.human_player = None
        self.spawn_offset = spawn_offset
        self.agent = agent

        self.renderer = TronRender(board_size, num_players, winner_player=0)

        self.action_space = Discrete(3)
        self.observation_space = Dict({
            'board':
            Box(0, num_players, shape=(board_size, board_size)),
            'heads':
            Box(0, np.infty, shape=(num_players, )),
            'directions':
            Box(0, 4, shape=(num_players, )),
            'deaths':
            Box(0, num_players, shape=(num_players, ))
        })
Ejemplo n.º 2
0
    def __init__(self, board_size=15, num_players=4):
        self.env = TronGridEnvironment.create(board_size=board_size,
                                              num_players=num_players)
        self.state = None
        self.players = None

        self.renderer = TronRender(board_size, num_players)

        self.observation_space = Dict({
            'board':
            Box(0, num_players, shape=(board_size, board_size)),
            'heads':
            Box(0, np.infty, shape=(num_players, )),
            'directions':
            Box(0, 4, shape=(num_players, )),
            'deaths':
            Box(0, num_players, shape=(num_players, ))
        })
Ejemplo n.º 3
0
def tron_client(env: TronGridClientEnvironment, username: str):
    logger.debug("Connecting to game server and waiting for game to start")
    player_num = env.connect(username)
    logger.debug("Player number: {}".format(player_num))
    logger.debug("First observation: {}".format(env.wait_for_turn()))
    logger.info("Game started...")

    current_action = Action()
    control_thread = ControlThread(current_action)
    control_thread.start()

    frame_start_time = time()

    board_size = env.server_environment.N
    num_players = env.server_environment.num_players
    renderer = TronRender(board_size, num_players)

    while True:
        renderer.render_observation(env.observation)
        frame_delta = time() - frame_start_time
        sleep((FRAME_MILLISECONDS / 1000) - frame_delta)

        new_obs, reward, terminal, winners = env.step(current_action())
        frame_start_time = time()

        current_action.reset()

        if terminal:
            logger.info("Game is over. Players {} won".format(winners))
            logger.info("Final observation: {}".format(new_obs))
            renderer.close()
            break
Ejemplo n.º 4
0
class TronRaySinglePlayerEnvironment(gym.Env):
    def __init__(self,
                 board_size=15,
                 num_players=4,
                 spawn_offset=2,
                 agent=SimpleAvoidAgent()):
        self.env = TronGridEnvironment.create(board_size=board_size,
                                              num_players=num_players)
        self.state = None
        self.players = None
        self.human_player = None
        self.spawn_offset = spawn_offset
        self.agent = agent

        self.renderer = TronRender(board_size, num_players, winner_player=0)

        self.action_space = Discrete(3)
        self.observation_space = Dict({
            'board':
            Box(0, num_players, shape=(board_size, board_size)),
            'heads':
            Box(0, np.infty, shape=(num_players, )),
            'directions':
            Box(0, 4, shape=(num_players, )),
            'deaths':
            Box(0, num_players, shape=(num_players, ))
        })

    def reset(self):
        self.state, self.players = self.env.new_state(
            spawn_offset=self.spawn_offset)
        self.human_player = self.players[0]

        return self._get_observation(self.human_player)

    def _get_observation(self, player):
        return self.env.state_to_observation(self.state, player)

    def step(self, action: int):
        human_player = self.human_player

        action_to_string = {0: 'forward', 1: 'right', 2: 'left'}

        actions = []
        for player in self.players:
            if player == human_player:
                actions.append(action_to_string[action])
            else:
                actions.append(
                    self.agent(self.env, self._get_observation(player)))

        self.state, self.players, rewards, terminal, winners = self.env.next_state(
            self.state, self.players, actions)

        observation = self._get_observation(human_player)
        reward = rewards[human_player]
        done = (human_player not in self.players) or terminal

        return observation, reward, done, {}

    def render(self, mode='human'):
        if self.state is None:
            return None

        return self.renderer.render(self.state, mode)

    def close(self):
        self.renderer.close()

    def test(self, trainer, frame_time=0.1):
        self.close()
        state = self.reset()
        done = False
        action = None
        reward = None
        cumulative_reward = 0

        while not done:
            # Uncomment for multiagent
            # action = trainer.compute_action(np.expand_dims(extractor.transform(obs),axis=0), prev_action=action, prev_reward=reward)

            # Uncomment for single agent
            action = trainer.compute_action(extractor.transform(obs),
                                            prev_action=action,
                                            prev_reward=reward)

            state, reward, done, results = self.step(action)
            cumulative_reward += reward
            self.render()

            sleep(frame_time)

        self.render()
        return cumulative_reward
Ejemplo n.º 5
0
class TronRayEnvironment(MultiAgentEnv):
    action_space = Discrete(3)

    def __init__(self, board_size=15, num_players=4):
        self.env = TronGridEnvironment.create(board_size=board_size,
                                              num_players=num_players)
        self.state = None
        self.players = None

        self.renderer = TronRender(board_size, num_players)

        self.observation_space = Dict({
            'board':
            Box(0, num_players, shape=(board_size, board_size)),
            'heads':
            Box(0, np.infty, shape=(num_players, )),
            'directions':
            Box(0, 4, shape=(num_players, )),
            'deaths':
            Box(0, num_players, shape=(num_players, ))
        })

    def reset(self):
        self.state, self.players = self.env.new_state()
        return {
            str(i): self.env.state_to_observation(self.state, i)
            for i in range(self.env.num_players)
        }

    def step(self, action_dict):
        #print("CALLING STEP ****************************************************************************************")
        action_to_string = {0: 'forward', 1: 'right', 2: 'left'}

        actions = []

        for player in self.players:
            action = action_dict.get(str(player), 0)
            actions.append(action_to_string[action])

        self.state, self.players, rewards, terminal, winners = self.env.next_state(
            self.state, self.players, actions)

        num_players = self.env.num_players
        alive_players = set(self.players)

        observations = {
            str(i): self.env.state_to_observation(self.state, i)
            for i in map(int, action_dict.keys())
        }
        rewards = {str(i): rewards[i] for i in map(int, action_dict.keys())}
        dones = {
            str(i): i not in alive_players
            for i in map(int, action_dict.keys())
        }
        dones['__all__'] = terminal
        if dones['0'] == True:
            dones['1'] = True
            dones['2'] = True
            dones['3'] = True
            dones["__all__"] = True

        return observations, rewards, dones, {}

    def render(self, mode='human'):
        if self.state is None:
            return None

        return self.renderer.render(self.state, mode)

    def close(self):
        self.renderer.close()

    def test(self, trainer, frame_time=0.1):
        num_players = self.env.num_players
        self.close()
        state = self.reset()
        done = {"__all__": False}
        action = {str(i): None for i in range(num_players)}
        reward = {str(i): None for i in range(num_players)}
        cumulative_reward = 0

        while not done['__all__']:
            action = {
                i: trainer.compute_action(state[i],
                                          prev_action=action[i],
                                          prev_reward=reward[i],
                                          policy_id="opponent")
                for i in map(str, range(num_players))
            }
            action['0'] = trainer.compute_action(state['0'],
                                                 prev_action=action['0'],
                                                 prev_reward=reward['0'],
                                                 policy_id="trainer")

            state, reward, done, results = self.step(action)
            cumulative_reward += sum(reward.values())
            if done['0'] == True:
                print("Player Died")
            self.render()

            sleep(frame_time)

        self.render()
        return cumulative_reward