Esempio n. 1
0
def test_execute_action_on_copies_doesnt_conflict():
    players = [
        SimplePlayer(Color.RED),
        SimplePlayer(Color.BLUE),
        SimplePlayer(Color.WHITE),
        SimplePlayer(Color.ORANGE),
    ]
    game = Game(players)
    p0_color = game.state.colors[0]
    game.execute(Action(p0_color, ActionType.BUILD_SETTLEMENT, 0))

    action = Action(p0_color, ActionType.BUILD_ROAD, (0, 1))

    game_copy = game.copy()
    game_copy.execute(action)

    game_copy = game.copy()
    game_copy.execute(action)

    game.execute(action)
Esempio n. 2
0
    def decide(self, game: Game, playable_actions):
        """
        For each move, will run N playouts, get statistics, and save into replay buffer.
        Every M decisions, will:
            - flush replay buffer to disk (for offline experiments)
            - report progress on games thus far to TensorBoard (tf.summary module)
            - update model by choosing L random samples from replay buffer
                and train model. do we need stability check? i think not.
                and override model path.
        Decision V1 looks like, predict and choose the one that creates biggest
            'distance' against enemies. Actually this is the same as maximizing wins.
        Decision V2 looks the same as V1, but minimaxed some turns in the future.
        """
        if len(playable_actions
               ) == 1:  # this avoids imbalance (if policy-learning)
            return playable_actions[0]

        start = time.time()

        # Run MCTS playouts for each possible action, save results for training.
        samples = []
        scores = []
        print(playable_actions)
        for action in playable_actions:
            print("Considering", action)
            action_applied_game_copy = game.copy()
            action_applied_game_copy.execute(action)
            sample = create_sample_vector(action_applied_game_copy, self.color)
            samples.append(sample)

            if TRAIN:
                # Save snapshots from the perspective of each player (more training!)
                counter = run_playouts(action_applied_game_copy, NUM_PLAYOUTS)
                mcts_labels = {k: v / NUM_PLAYOUTS for k, v in counter.items()}
                DATA_LOGGER.consume(action_applied_game_copy, mcts_labels)

                scores.append(mcts_labels.get(self.color, 0))

        # TODO: if M step, do all 4 things.
        if TRAIN and self.step % FLUSH_EVERY == 0:
            self.update_model_and_flush_samples()

        # scores = get_model().call(tf.convert_to_tensor(samples))
        best_idx = np.argmax(scores)
        best_action = playable_actions[best_idx]

        if TRAIN:
            print("Decision took:", time.time() - start)
        self.step += 1
        return best_action
Esempio n. 3
0
def test_copy():
    """Play 30 moves, copy game, ensure they look the same but not the same."""
    players = [
        SimplePlayer(Color.RED),
        SimplePlayer(Color.BLUE),
        SimplePlayer(Color.WHITE),
        SimplePlayer(Color.ORANGE),
    ]
    game = Game(players)
    for i in range(30):
        game.play_tick()

    game_copy = game.copy()
    assert json.dumps(game, cls=GameEncoder) == json.dumps(game_copy,
                                                           cls=GameEncoder)
    assert game_copy != game
Esempio n. 4
0
    def decide(self, game: Game, playable_actions: Iterable[Action]):
        """Should return one of the playable_actions.

        Args:
            game (Game): complete game state. read-only.
            playable_actions (Iterable[Action]): options to choose from
        Return:
            action (Action): Chosen element of playable_actions
        """
        decided_action = super().decide(game, playable_actions)

        # Log simple dataset of simple features and MCTS Score
        results = run_playouts(game.copy(), NUM_SIMULATIONS)
        vector = simple_feature_vector(game, self.color)
        vector["LABEL"] = results[self.color] / float(NUM_SIMULATIONS)
        RECORDS.append(vector)

        return decided_action
Esempio n. 5
0
    def decide(self, game: Game, playable_actions):
        # if len(game.state.actions) > 10:
        #     import sys

        #     sys.exit(1)
        actions = list_prunned_actions(game) if self.prunning else playable_actions
        if len(actions) == 1:
            return actions[0]

        start = time.time()
        root = StateNode(self.color, game.copy(), None, self.prunning)
        for _ in range(self.num_simulations):
            root.run_simulation()

        print(
            f"{str(self)} took {time.time() - start} secs to decide {len(playable_actions)}"
        )

        return root.choose_best_action()
Esempio n. 6
0
    def decide(self, game: Game, playable_actions):
        if len(playable_actions) == 1:
            return playable_actions[0]

        best_value = float("-inf")
        best_actions = []
        for action in playable_actions:
            game_copy = game.copy()
            game_copy.execute(action)

            key = player_key(game_copy.state, self.color)
            value = game_copy.state.player_state[f"{key}_ACTUAL_VICTORY_POINTS"]
            if value == best_value:
                best_actions.append(action)
            if value > best_value:
                best_value = value
                best_actions = [action]

        return random.choice(best_actions)
Esempio n. 7
0
    def decide(self, game: Game, playable_actions):
        actions = self.get_actions(game)
        if len(actions) == 1:
            return actions[0]

        if self.epsilon is not None and random.random() < self.epsilon:
            return random.choice(playable_actions)

        start = time.time()
        state_id = str(len(game.state.actions))
        node = DebugStateNode(state_id,
                              self.color)  # i think it comes from outside
        deadline = start + MAX_SEARCH_TIME_SECS
        result = self.alphabeta(game.copy(), self.depth, float("-inf"),
                                float("inf"), deadline, node)
        # print("Decision Results:", self.depth, len(actions), time.time() - start)
        # if game.state.num_turns > 10:
        #     render_debug_tree(node)
        #     breakpoint()
        return result[0]
Esempio n. 8
0
    def decide(self, game: Game, playable_actions):
        if len(playable_actions) == 1:
            return playable_actions[0]

        start = time.time()
        # num_playouts = PLAYOUTS_BUDGET // len(playable_actions)
        num_playouts = self.num_playouts

        best_action = None
        max_wins = None
        for action in playable_actions:
            action_applied_game_copy = game.copy()
            action_applied_game_copy.execute(action)

            counter = run_playouts(action_applied_game_copy, num_playouts)

            wins = counter[self.color]
            if max_wins is None or wins > max_wins:
                best_action = action
                max_wins = wins

        print(f"Greedy took {time.time() - start} secs to decide " +
              f"{len(playable_actions)} at {num_playouts} per action")
        return best_action
Esempio n. 9
0
    RandomPlayer(Color.BLUE),
    RandomPlayer(Color.WHITE),
    RandomPlayer(Color.ORANGE),
])
game.play()
print(sys.getsizeof(game))
print(getsize(game))
print(game)

start = time.time()
copy.deepcopy(game)
end = time.time()
print("copy.deepcopy(game) took", end - start, "seconds")

start = time.time()
game.copy()
end = time.time()
print("game.copy() took", end - start, "seconds")

start = time.time()
state = np.random.random((1500, ))
end = time.time()
print("Create Numpy Vector", end - start, "seconds")
print(sys.getsizeof(state), getsize(state))

start = time.time()
np.copy(state)
end = time.time()
print("np.copy(a) took", end - start, "seconds")

a = np.random.randint(0, 2, size=(500, 500))