def findBestNoteUCB(self, node: Node): parentVisit = node.getAction().getVisitCount() bestchildren = [] bestScore = 0.0 for c in node.getChilds(): #type; State score = self.ucbValue(parentVisit, c.getAction().getWinScore(), c.getAction().getVisitCount()) if score == bestScore: bestchildren.append(c) if score > bestScore: bestchildren = [c] bestScore = score if len(bestchildren) == 0: logger.warning("No best Children Found") return random.choice(bestchildren)
def simulateRound(self, node: Node): rnd = get_round_from_player_round(node.getAction().getRound(), node.getAction().getRound().hands) rnd.action_play_card(node.getAction().getCard()) cards = rnd.nr_played_cards randomPlayer = RandomPlayerSchieber() while cards < 36: player_rnd = PlayerRoundCheating() player_rnd.set_from_round(rnd) card_action = randomPlayer.play_card(player_rnd) rnd.action_play_card(card_action) cards += 1 myPoints = rnd.points_team_0 pointsEnemy = rnd.points_team_1 maxPoints = myPoints + pointsEnemy if myPoints > pointsEnemy: return (myPoints - 0) / (maxPoints - 0) else: return 0
def expandNode(self, node: Node, rnd: PlayerRoundCheating): validCards = np.flatnonzero(rnd.get_valid_cards()) for card in validCards: newNode = Node() newNode.setParent(node) newNode.getAction().setRound(rnd) newNode.getAction().setPlayerNr(node.getAction().getRound().player) newNode.getAction().setCard(card) node.addChild(newNode)
def __init__(self) -> None: self._rootNode = Node()