def testGreedyActionHeuristic(self) -> None:
        self.game.new_game()
        state: State = self.game.state

        state.inject(0, Laboratory())
        state.inject(0, Village())
        state.inject(0, Smithy())

        state.advance_next_decision()

        # Action Phase: Play Lab
        r = DecisionResponse([])
        self.players[0].makeDecision(state, r)
        self.assertTrue(isinstance(r.cards[0], Laboratory))
        state.process_decision(r)

        state.advance_next_decision()

        r = DecisionResponse([])
        self.players[0].makeDecision(state, r)
        self.assertTrue(isinstance(r.cards[0], Village))
        state.process_decision(r)

        state.advance_next_decision()

        r = DecisionResponse([])
        self.players[0].makeDecision(state, r)
        self.assertTrue(isinstance(r.cards[0], Smithy))
        state.process_decision(r)

        state.advance_next_decision()

        self.assertEqual(state.get_zone_card_count(0, Zone.Hand), 10)
    def test_merchant(self) -> None:
        self.game.new_game()

        p_state: PlayerState = self.game.state.player_states[0]

        # Inject cards into hand
        merchant = Merchant()
        first_silver = Silver()
        second_silver = Silver()
        self.game.state.inject(0, merchant)
        self.game.state.inject(0, first_silver)
        self.game.state.inject(0, second_silver)
        self.game.state.inject(0, Estate())
        self.game.state.inject(0, Estate())

        self.game.state.advance_next_decision()

        # Action Phase Decision -- Play Merchant
        r = DecisionResponse([merchant])
        self.game.state.process_decision(r)
        self.game.state.advance_next_decision()

        # Treasure Phase Decision -- Play All Treasures
        r = DecisionResponse([first_silver])
        self.game.state.process_decision(r)
        self.game.state.advance_next_decision()

        r = DecisionResponse([second_silver])
        self.game.state.process_decision(r)
        self.game.state.advance_next_decision()

        self.assertEqual(p_state.coins, 5)
    def testChapelHeuristic(self) -> None:
        self.game.new_game()
        state: State = self.game.state

        state.inject(0, Chapel())
        state.advance_next_decision()

        # Action Phase decision: defaults to playing Chapel
        r: DecisionResponse = DecisionResponse([])
        self.players[0].makeDecision(state, r)
        self.game.state.process_decision(r)

        self.game.state.advance_next_decision()

        # Should auto trash 3 Copper and 1 Estate
        r = DecisionResponse([])
        self.players[0].makeDecision(state, r)
        self.game.state.process_decision(r)

        # Process TrashCard events
        self.game.state.advance_next_decision()

        n_copper = state.get_card_count(0, Copper)
        n_estate = state.get_card_count(0, Estate)
        self.assertTrue(n_copper == 3 or n_copper == 4)
        self.assertEqual(n_copper + n_estate, 6)
Exemple #4
0
    def step(self, action: DecisionResponse) -> Tuple[State, int, bool, Any]:
        s: State = self.game.state
        d: DecisionState = s.decision

        if s.phase != Phase.BuyPhase:
            raise ValueError('Cannot step from any phase other than Buy Phase.')

        p: Player = self.game.players[d.controlling_player].controller

        s.process_decision(action)

        s.advance_next_decision()

        # Skip all non-Buy phases until end of game
        while s.phase != Phase.BuyPhase and not self._done:
            response = DecisionResponse([])
            p = self.game.players[d.controlling_player].controller
            p.makeDecision(s, response)
            s.process_decision(response)
            s.advance_next_decision()

        reward = 0
        if self._done:
            p0win = self.game.is_winner(0)
            p1win = self.game.is_winner(1)
            if p0win and p1win:
                reward = 0
            elif p0win:
                reward = 1
            else:
                reward = -1

        return s, reward, self._done, None
def train_elog(env: Environment, epochs: int, train_epochs_interval: int):
    for epoch in tqdm(range(epochs)):
        state = env.reset()
        done = False
        data = {
            'features': [],
            'rewards': [],
            'cards': [],
            'idxs': state.feature.idxs
        }
        while not done:
            action = DecisionResponse([])
            d: DecisionState = state.decision
            player: Player = env.players[d.controlling_player]

            player.makeDecision(state, action)

            x = state.feature.to_numpy()
            data['features'].append(x)
            data['cards'].append(action.single_card)

            obs, reward, done, _ = env.step(action)

        data['rewards'].extend([reward] *
                               (len(data['features']) - len(data['rewards'])))

        for player in env.players:
            if isinstance(player, RolloutPlayer):
                player.rollout.update(**data)
                if (epoch + 1) % train_epochs_interval == 0:
                    player.rollout.learn()
Exemple #6
0
    def run(self):
        s = self.game.state
        d: DecisionState = s.decision
        tree_score = 0
        # run the game up to game end or turn limit reached
        while d.type != DecisionType.DecisionGameOver and s.player_states[
                0]._turns < self.T:
            if d.text:
                logging.info(d.text)
            response = DecisionResponse([])
            player = self.game.players[d.controlling_player]
            next_node = player.controller.makeDecision(s, response)

            if s.phase == Phase.BuyPhase:
                # apply selection until leaf node is reached
                if next_node:
                    assert next_node == self.player.node
                    self.player.node.n += 1
                elif not self.expanded:
                    # expand one node
                    cards = list(
                        filter(lambda x: not isinstance(x, Curse),
                               d.card_choices + [None]))
                    self.player.node.add_unique_children(cards)
                    self.expanded = True
                    self.player.node = self.player.node.get_child_node(
                        response.single_card)
                    self.player.node.n += 1
                    # Uncomment to track UCT score within the tree
                    tree_score = self.game.get_player_scores()[0]
                    self.data.update_split_scores(tree_score, False, self.iter)
                elif self.rollout_model == Rollout.HistoryHeuristic:
                    self.rollout_cards.append(response.single_card)

            s.process_decision(response)
            s.advance_next_decision()

        score = self.game.get_player_scores()[0]
        # update data
        self.data.update_split_scores(score - tree_score, True, self.iter)

        # backpropagate
        delta = score
        self.player.node.v += delta
        self.player.node = self.player.node.parent
        while self.player.node != self.player.root:
            self.player.node.update_v(lambda x: sum(x) / len(x))
            self.player.node = self.player.node.parent

        # update history heuristic
        if self.rollout_model == Rollout.HistoryHeuristic:
            self.rollout.update(cards=self.rollout_cards, score=score)
        elif self.rollout_model == Rollout.LinearRegression:
            counts = self.game.state.get_card_counts(0)
            self.rollout.update(counts=counts, score=score, i=self.iter)

        return self.game.get_player_scores()[0]
def simulate(env: Environment, n: int, tree: GameTree, turn_log=False, action_log=False, card_log=False) -> SimulationData:
    # TODO: Fix this shit
    sim_data = SimulationData(Supply(env.config).get_supply_card_types())

    for i in tqdm(range(n)):
        state: State = env.reset()
        if tree:
            tree.reset(state)
        done = False
        t_start = time.time()
        starting_player_buy = None
        while not done:
            action: DecisionResponse = DecisionResponse([])
            d: DecisionState = state.decision
            pid: int = d.controlling_player
            player = env.players[pid]
            player.makeDecision(state, action)

            if state.phase == Phase.ActionPhase:
                # +1 to turns to get current turn
                sim_data.update_action(i, pid, state.player_states[pid].turns + 1, action.cards[0])

            if state.phase == Phase.BuyPhase and tree:
                tree.advance(action.single_card)

            log_buy = (state.phase == Phase.BuyPhase)

            obs, reward, done, _ = env.step(action)

            if turn_log and log_buy:
                if pid == 0:
                    starting_player_buy = action.single_card
                else:
                    sim_data.update_turn(i, 0, state.player_states[0].turns, state.get_player_score(0), starting_player_buy, state.get_coin_density(0))
                    sim_data.update_turn(i, 1, state.player_states[1].turns, state.get_player_score(1), action.single_card, state.get_coin_density(1))
            if card_log and log_buy:
                if pid == 1:
                    sim_data.update_card(i, 0, state.player_states[0].turns, state.get_card_counts(0))
                    sim_data.update_card(i, 1, state.player_states[1].turns, state.get_card_counts(1))

        if state.player_states[0].turns > state.player_states[1].turns:
            sim_data.update_card(i, 0, state.player_states[0].turns, state.get_card_counts(0))
            sim_data.update_turn(i, 0, state.player_states[0].turns, state.get_player_score(0), starting_player_buy, state.get_coin_density(0))

        t_end = time.time()
        sim_data.update(env.game, t_end - t_start)

    sim_data.finalize(env.game)

    print('===SUMMARY===')
    print(sim_data.summary)

    return sim_data
Exemple #8
0
    def test_event_sentry(self) -> None:
        self.game.new_game()

        # Inject Sentry in player's hand
        sentry = Sentry()

        self.game.state.inject(0, sentry)

        self.game.state.advance_next_decision()

        # Action Phase Decision
        r = DecisionResponse([])
        r.cards = [sentry]
        self.game.state.process_decision(r)
        self.game.state.advance_next_decision()

        # Choose to trash one card
        d = self.game.state.decision
        trashed = d.card_choices[0]
        r = DecisionResponse([trashed])
        self.game.state.process_decision(r)
        # Trash card
        self.game.state.advance_next_decision()

        self.assertEqual(self.game.state.trash, [trashed])

        # Choose to discard one card
        d = self.game.state.decision
        discarded = d.card_choices[0]
        r = DecisionResponse([discarded])
        self.game.state.process_decision(r)
        # Discard card
        self.game.state.advance_next_decision()

        d = self.game.state.decision
        p_state: PlayerState = self.game.state.player_states[0]
        self.assertEqual(p_state._discard, [discarded])
        self.assertIsNone(d.active_card)
Exemple #9
0
 def run(self, T=None):
     d = self.state.decision
     self.state.advance_next_decision()
     while d.type != DecisionType.DecisionGameOver:
         if T is not None and all(t.turns >= T
                                  for t in self.state.player_states):
             break
         if d.text:
             logging.info(d.text)
         response = DecisionResponse([])
         player = self.players[self.state.decision.controlling_player]
         player.controller.makeDecision(self.state, response)
         self.state.process_decision(response)
         self.state.advance_next_decision()
Exemple #10
0
    def test_moat_reveal(self) -> None:
        self.game.new_game()

        # Inject necessary cards into players' hands
        attack_card = Militia()
        moat_card = Moat()
        self.game.state.inject(0, attack_card)
        self.game.state.inject(1, moat_card)

        self.game.state.advance_next_decision()

        # Action Phase decision
        r = DecisionResponse([])
        r.cards = [attack_card]
        self.game.state.process_decision(r)
        self.game.state.advance_next_decision()

        # MoatReveal reaction
        r = DecisionResponse([])
        r.choice = 0
        self.game.state.process_decision(r)
        self.game.state.advance_next_decision()

        self.assertEqual(self.game.state.events, [])
Exemple #11
0
    def reset(self, **kwargs) -> State:
        if self.randomize_player_order:
            np.random.shuffle(self.players)
        self.game = Game(self.config, self.players)
        self.game.new_game()
        self.game.state.advance_next_decision()

        s: State = self.game.state
        d: DecisionState = s.decision

        while s.phase != Phase.BuyPhase and not self._done:
            response = DecisionResponse([])
            p = self.game.players[d.controlling_player].controller
            p.makeDecision(s, response)
            s.process_decision(response)
            s.advance_next_decision()

        return self.game.state
    def test_vassal_effect_play_action(self) -> None:
        self.game.new_game()
        p_state: PlayerState = self.game.state.player_states[0]
        opp_state: PlayerState = self.game.state.player_states[1]
        card = Bandit()
        p_state._deck[-1] = card
        first_discarded = opp_state._deck[-1]
        second_discarded = opp_state._deck[-2]
        effect = VassalEffect()

        # Play Bandit
        r = DecisionResponse([], 1)
        effect.play_action(self.game.state)
        self.game.state.advance_next_decision()
        self.game.state.process_decision(r)

        # Process Bandit events
        self.game.state.advance_next_decision()
        self.assertIn(card, p_state._play_area)
        self.assertIn(first_discarded, opp_state._discard)
        self.assertIn(second_discarded, opp_state._discard)
Exemple #13
0
def sample_training_batch(n: int, p: float, config: GameConfig, players: Iterable[Player], win_loss=False) -> Tuple[np.array, np.array]:
    env = DefaultEnvironment(config, players)
    X = []
    y = []

    rng = np.random.default_rng()

    print('Generating training data from self-play...')
    for epoch in tqdm(range(n)):
        state: State = env.reset()
        done = False
        while not done:
            action = DecisionResponse([])
            d = state.decision
            player = players[d.controlling_player]
            player.makeDecision(state, action)
            obs, reward, done, _ = env.step(action)

            feature = obs.feature.to_numpy()
            if p <= 1 and p > 0:
                if rng.uniform(0, 1) < p:
                    X.append(feature)
            else:
                if obs.player_states[d.controlling_player].turns < p:
                    X.append(feature)

        if p <= 0:
            X.append(feature)

        y.extend([reward] * (len(X) - len(y)))

    y = np.array(y)

    if win_loss:
        y[y == -1] = 0

    return np.array(X), y
def train_mcts(env: Environment,
               tree: GameTree,
               path: str,
               rollout_path: str,
               epochs: int,
               train_epochs_interval: int = 1000,
               train_epochs_cap=10000,
               save_epochs=1000,
               scoring='win_loss'):
    for epoch in tqdm(range(epochs)):
        state: State = env.reset()
        tree.reset(state)
        done = False
        expanded = False
        flip = False
        data = {
            'features': [],
            'rewards': [],
            'cards': [],
            'idxs': state.feature.idxs
        }
        data['model_name'] = os.path.split(path)[-1]
        while not done:
            action = DecisionResponse([])
            d: DecisionState = state.decision
            player: Player = env.players[d.controlling_player]

            # Add any states now visible due to randomness
            if tree.in_tree:
                cards = d.card_choices + [None]
                tree.node.add_unique_children(cards)

            player.makeDecision(state, action)

            if isinstance(player, MCTSPlayer):
                x = state.feature.to_numpy()
                data['features'].append(x)
                data['cards'].append(action.single_card)

            # Advance to the next node within the tree, implicitly adding a node the first time we exit tree
            if tree.in_tree:
                tree.advance(action.single_card)

            # First time we go out of tree, enter rollout phase
            if not expanded and not tree.in_tree:
                # Previous node is starting player action, so current node is opponent player action.
                flip = (state.player == 1)
                expanded = True

            obs, reward, done, _ = env.step(action)

        data['rewards'].extend([reward] *
                               (len(data['features']) - len(data['rewards'])))
        start_idx = 1 if flip else 0
        p0_score, p1_score = state.get_player_score(0), state.get_player_score(
            1)
        if scoring == 'score':
            p0_reward, p1_reward = p0_score, p1_score
        elif scoring == 'win_loss':
            if reward == 0:
                p0_reward, p1_reward = 1 / 2, 1 / 2
            elif reward == 1:
                p0_reward, p1_reward = 1, 0
            else:
                p0_reward, p1_reward = 0, 1
        elif scoring == 'score_ratio':
            min_score = min(p0_score, p1_score)
            if min_score < 0:
                p0_score_nonneg, p1_score_nonneg = p0_score + abs(
                    min_score), p1_score + abs(min_score)
            else:
                p0_score_nonneg, p1_score_nonneg = p0_score, p1_score
            if p0_score_nonneg == 0 and p1_score_nonneg == 0:
                p0_reward, p1_reward = 0, 0
            else:
                total_score = p0_score_nonneg + p1_score_nonneg
                p0_reward, p1_reward = p0_score / total_score, p1_score / total_score

        tree.node.backpropagate((p0_reward, p1_reward), start_idx=start_idx)

        if save_epochs > 0 and epoch % save_epochs == 0:
            save(path, tree._root)

            for player in env.players:
                if isinstance(player, MCTSPlayer):
                    player.rollout.save(rollout_path)
                    break

        # mcts players share the tree, so only update once
        for player in env.players:
            if isinstance(player, MCTSPlayer):
                player.rollout.update(**data)
                if (epoch + 1) % train_epochs_interval == 0 and (
                        epoch + 1) < train_epochs_cap:
                    player.rollout.learn()

    for player in env.players:
        if isinstance(player, MCTSPlayer):
            player.rollout.save(rollout_path)
            break
    save(path, tree._root)