Esempio n. 1
0
 def printB(self, b1=False):
     board = [[0] * 9 for i in range(9)]
     for row in range(9):
         for col in range(9):
             if b1:
                 board[row][col] = self.b1[row][col]['farbe']
             else:
                 board[row][col] = self.b[row][col]['farbe']
     gameGo.printBrett(board)
     print('')
Esempio n. 2
0
 def testBoard(b, b1, whoMoves, soll):
     # return Erfolg 1/0
     y_pred = predict(gameGo.b1To5(b, b1, whoMoves), model=model, mitPrint=not printNurSummary)
     if not printNurSummary:
         gameGo.printBrett(b)
         print('Nächster Zug sollte sein: ', end='')
         for i in range(len(soll)-1):
             print(str(soll[i])+', ',end='')
         print(str(soll[-1]))
         print('NnGo predicts: ' + str(y_pred))
     if y_pred in soll:
         return 1
     else:
         return 0
Esempio n. 3
0
 def get_policy(self, s, tau=1):
     """
     Extract policy by the state
     :param state_int: state of the board
     :return: probs
     """
     counts = self.stateStats.b[s][0]
     if tau == 0:
         probs = [0.0] * gameGo.ANZ_POSITIONS
         probs[np.argmax(counts)] = 1.0
     else:
         counts = [count ** (1.0 / tau) for count in counts]
         total = sum(counts)
         if total == 0:  ### sollte NICHT passieren
             print('mcts.get_policy mit sum(count)=0, bei:')
             b2 = gameGo.intToB(s)
             gameGo.printBrett(b2[0])
             probs = self.stateStats.b[s][2]
         else:
             probs = [count / total for count in counts]
     return probs
Esempio n. 4
0
 def search(self, count, batch_size, s, player, net, zugNr, zugMax, device):
     # return: Anzahl von find_leaf calls, die Spiel bis Ende führten
     countEnd = 0
     if batch_size > 0:
         for _ in range(count):
             countEndMini = self.search_minibatch(batch_size, s, player, net, zugNr, zugMax, device)
             countEnd += countEndMini
     else:
         for _ in range(count):
             value, leaf_state, leaf_player, states, actions = self.find_leaf(s, player, zugNr, zugMax)
             if value is None:
                 # expand mit leaf_state, leaf_player, states, actions
                 batch_v = gameGo.state_lists_to_batch([gameGo.intToB(leaf_state)], [leaf_player], device)
                 logits_v, value_v = net(batch_v)
                 probs_v = F.softmax(logits_v, dim=1)
                 probs = probs_v.detach().cpu().numpy()[0]
                 value = value_v.data.cpu().numpy()[0][0]
                 # create the node
                 self.stateStats.expand(leaf_state, probs)
             else:
                 countEnd += 1
                 print('Leaf bis Spielende.')
                 cv = -value
                 cp = leaf_player
                 for state, action in zip(states[::-1], actions[::-1]):
                     print('backup mit action: ', action, 'player: ', cp, ' value: ', cv, ' bei:')
                     cv = -cv
                     cp = 1-cp
                     gameGo.printBrett(gameGo.intToB(state)[0])
             # backup mit value, states, actions
             # leaf state not stored in states + actions, so the value of the leaf will be the value of the opponent
             cur_value = -value
             for state, action in zip(states[::-1], actions[::-1]):
                 self.stateStats.backup(state, action, cur_value)
                 cur_value = -cur_value
     return countEnd
Esempio n. 5
0
def play_game(mcts_stores, replay_buffer, net1, net2, steps_before_tau_0,
              mcts_searches, mcts_batch_size=0, stat='nicht', device='cpu'):
    """
    Play one single game, memorizing transitions into the replay buffer
    :param mcts_stores: could be single MCTS or two MCTSes for individual net
    :param replay_buffer: queue with (state, probs, values), if None, nothing is stored
    :param net1: player1
    :param net2: player2
    :param mcts_batch_size: Batch size for MCTS Minibatch, 0: no Minibatch Call
    :return: value for the game in respect to net1 (+1 if p1 won, -1 if lost, 0 if draw)
    Statistik: Anteil Leaf-Calls wird bei erstem Evaluate Spiel bestimmt
                Unterschiede MCTS vs NN wird bei letztem Evaluate Spiel bestimmt
                kann insg. über PLAY_STATISTIK gesteuert werden: aus, nur-Summary, detailliert
    """
#    assert isinstance(replay_buffer, (collections.deque, type(None)))
#    assert isinstance(mcts_stores, (mctsGo.MCTS, type(None), list))
#    assert isinstance(net1, NnGo)
#    assert isinstance(net2, NnGo)
    if isinstance(mcts_stores, mctsGo.MCTS):
        mcts_stores = [mcts_stores, mcts_stores]
    spiel = goSpielNoGraph.PlayGo(gameGo.b2Initial, zugMax=ZUG_MAX)
    state = spiel.bToInt()
    nets = [net1, net2]
    cur_player = 1 # schwwarz beginnt immer, und das ist net1
    step = 0
    countDiff = 0
    countEnd = 0
    countSearch = mcts_searches * mcts_batch_size if mcts_batch_size > 0 else mcts_searches
    tau = 1 if steps_before_tau_0 > 0 else 0
    game_history = []
    values, zuege = [], []
    while True:
        statEnd = mcts_stores[1-cur_player].search(mcts_searches, mcts_batch_size, state, cur_player,
                                        nets[1-cur_player], zugNr = step+1, zugMax=ZUG_MAX, device=device)
        countEnd += statEnd
        probs = mcts_stores[1-cur_player].get_policy(state, tau=tau)
        game_history.append((state, cur_player, probs))
        action = np.random.choice(gameGo.ANZ_POSITIONS, p=probs)
        if not spiel.setzZug(action):   # hier move: setzen eines Zuges
            print('Impossible action at step ', step, ', Player: ', cur_player, '. Action=', action, ' at:')
            spiel.printB()
            print('b1:')
            spiel.printB(b1=True)
            print('mit probs:')
            gameGo.printBrett(probs, istFlat=True, mitFloat=True)
            counts = mcts_stores[1-cur_player].stateStats.b[state][0]
            print('Counts:')
            gameGo.printBrett(counts, istFlat=True)
            counts[action] = 0
            if not spiel.setzZug(np.argmax(counts)):
                spiel.setzZug(81)
        elif PLAY_STATISTIK == 1:
            zuege.append(action)
            values.append('%1.2f ' % (mcts_stores[1-cur_player].stateStats.b[state][2][action]))
        if PLAY_STATISTIK > 0 and stat != 'nicht':
            batch_v = gameGo.state_lists_to_batch([gameGo.intToB(state)], [cur_player], device)
            p_v, _ = nets[1-cur_player](batch_v)
            probs = p_v.detach().cpu().numpy()[0]
            position = np.argmax(probs)
            if position != action:
                countDiff += 1
                if PLAY_STATISTIK == 2:
                    print('play_game step ', step+1, ' action Unterschied!')
                    print('Action  MCTS: ', action, '  NN: ', position)
                    print('Anteil Leaf-Calls bis Spiel-Ende: '+str(statEnd)+' = '+str(int(statEnd*100/countSearch))+'%')
                    print('')
        if spiel.spielBeendet:
#            print('Gewinner:', spiel.gewinner, 'S:', spiel.pktSchwarz, 'W:', spiel.pktWeiss)
            if PLAY_STATISTIK == 1:
                spiel.sgfWrite(zuege, values)
            if spiel.gewinner == 1:
                net1_result = 1
                if cur_player == 1:
                    result = 1
                else:
                    result = -1
            elif spiel.gewinner == -1:
                net1_result = -1
                if cur_player == 1:
                    result = -1
                else:
                    result = 1
            else:
                result = 0
                net1_result = 0
            break
        cur_player = 1-cur_player
        state = spiel.bToInt()
        step += 1
        if step >= steps_before_tau_0:
            tau = 0
    if PLAY_STATISTIK > 0:
        if stat == 'Diff':
            print('play game Unterschiede MCTS zu NN: '+str(countDiff)+' = '+str(int(countDiff*100/(step+1)))+'%')
        elif stat == 'Leaf':
            print('Anteil Leaf-Calls bis Spiel-Ende insg: '
              + str(countEnd) + ' = ' + str(int(countEnd*100/(countSearch*(step+1)))) + '%')
    if replay_buffer is not None:
        for state, cur_player, probs in reversed(game_history):
            for drehung in (0, 90, 180, 270, 1, 2, 3, 4):
                replay_buffer.append((gameGo.drehB2(state, drehung), cur_player,
                                      gameGo.drehPosition(probs, drehung), result))
            result = -result
    return net1_result, step