def printB(self, b1=False): board = [[0] * 9 for i in range(9)] for row in range(9): for col in range(9): if b1: board[row][col] = self.b1[row][col]['farbe'] else: board[row][col] = self.b[row][col]['farbe'] gameGo.printBrett(board) print('')
def testBoard(b, b1, whoMoves, soll): # return Erfolg 1/0 y_pred = predict(gameGo.b1To5(b, b1, whoMoves), model=model, mitPrint=not printNurSummary) if not printNurSummary: gameGo.printBrett(b) print('Nächster Zug sollte sein: ', end='') for i in range(len(soll)-1): print(str(soll[i])+', ',end='') print(str(soll[-1])) print('NnGo predicts: ' + str(y_pred)) if y_pred in soll: return 1 else: return 0
def get_policy(self, s, tau=1): """ Extract policy by the state :param state_int: state of the board :return: probs """ counts = self.stateStats.b[s][0] if tau == 0: probs = [0.0] * gameGo.ANZ_POSITIONS probs[np.argmax(counts)] = 1.0 else: counts = [count ** (1.0 / tau) for count in counts] total = sum(counts) if total == 0: ### sollte NICHT passieren print('mcts.get_policy mit sum(count)=0, bei:') b2 = gameGo.intToB(s) gameGo.printBrett(b2[0]) probs = self.stateStats.b[s][2] else: probs = [count / total for count in counts] return probs
def search(self, count, batch_size, s, player, net, zugNr, zugMax, device): # return: Anzahl von find_leaf calls, die Spiel bis Ende führten countEnd = 0 if batch_size > 0: for _ in range(count): countEndMini = self.search_minibatch(batch_size, s, player, net, zugNr, zugMax, device) countEnd += countEndMini else: for _ in range(count): value, leaf_state, leaf_player, states, actions = self.find_leaf(s, player, zugNr, zugMax) if value is None: # expand mit leaf_state, leaf_player, states, actions batch_v = gameGo.state_lists_to_batch([gameGo.intToB(leaf_state)], [leaf_player], device) logits_v, value_v = net(batch_v) probs_v = F.softmax(logits_v, dim=1) probs = probs_v.detach().cpu().numpy()[0] value = value_v.data.cpu().numpy()[0][0] # create the node self.stateStats.expand(leaf_state, probs) else: countEnd += 1 print('Leaf bis Spielende.') cv = -value cp = leaf_player for state, action in zip(states[::-1], actions[::-1]): print('backup mit action: ', action, 'player: ', cp, ' value: ', cv, ' bei:') cv = -cv cp = 1-cp gameGo.printBrett(gameGo.intToB(state)[0]) # backup mit value, states, actions # leaf state not stored in states + actions, so the value of the leaf will be the value of the opponent cur_value = -value for state, action in zip(states[::-1], actions[::-1]): self.stateStats.backup(state, action, cur_value) cur_value = -cur_value return countEnd
def play_game(mcts_stores, replay_buffer, net1, net2, steps_before_tau_0, mcts_searches, mcts_batch_size=0, stat='nicht', device='cpu'): """ Play one single game, memorizing transitions into the replay buffer :param mcts_stores: could be single MCTS or two MCTSes for individual net :param replay_buffer: queue with (state, probs, values), if None, nothing is stored :param net1: player1 :param net2: player2 :param mcts_batch_size: Batch size for MCTS Minibatch, 0: no Minibatch Call :return: value for the game in respect to net1 (+1 if p1 won, -1 if lost, 0 if draw) Statistik: Anteil Leaf-Calls wird bei erstem Evaluate Spiel bestimmt Unterschiede MCTS vs NN wird bei letztem Evaluate Spiel bestimmt kann insg. über PLAY_STATISTIK gesteuert werden: aus, nur-Summary, detailliert """ # assert isinstance(replay_buffer, (collections.deque, type(None))) # assert isinstance(mcts_stores, (mctsGo.MCTS, type(None), list)) # assert isinstance(net1, NnGo) # assert isinstance(net2, NnGo) if isinstance(mcts_stores, mctsGo.MCTS): mcts_stores = [mcts_stores, mcts_stores] spiel = goSpielNoGraph.PlayGo(gameGo.b2Initial, zugMax=ZUG_MAX) state = spiel.bToInt() nets = [net1, net2] cur_player = 1 # schwwarz beginnt immer, und das ist net1 step = 0 countDiff = 0 countEnd = 0 countSearch = mcts_searches * mcts_batch_size if mcts_batch_size > 0 else mcts_searches tau = 1 if steps_before_tau_0 > 0 else 0 game_history = [] values, zuege = [], [] while True: statEnd = mcts_stores[1-cur_player].search(mcts_searches, mcts_batch_size, state, cur_player, nets[1-cur_player], zugNr = step+1, zugMax=ZUG_MAX, device=device) countEnd += statEnd probs = mcts_stores[1-cur_player].get_policy(state, tau=tau) game_history.append((state, cur_player, probs)) action = np.random.choice(gameGo.ANZ_POSITIONS, p=probs) if not spiel.setzZug(action): # hier move: setzen eines Zuges print('Impossible action at step ', step, ', Player: ', cur_player, '. Action=', action, ' at:') spiel.printB() print('b1:') spiel.printB(b1=True) print('mit probs:') gameGo.printBrett(probs, istFlat=True, mitFloat=True) counts = mcts_stores[1-cur_player].stateStats.b[state][0] print('Counts:') gameGo.printBrett(counts, istFlat=True) counts[action] = 0 if not spiel.setzZug(np.argmax(counts)): spiel.setzZug(81) elif PLAY_STATISTIK == 1: zuege.append(action) values.append('%1.2f ' % (mcts_stores[1-cur_player].stateStats.b[state][2][action])) if PLAY_STATISTIK > 0 and stat != 'nicht': batch_v = gameGo.state_lists_to_batch([gameGo.intToB(state)], [cur_player], device) p_v, _ = nets[1-cur_player](batch_v) probs = p_v.detach().cpu().numpy()[0] position = np.argmax(probs) if position != action: countDiff += 1 if PLAY_STATISTIK == 2: print('play_game step ', step+1, ' action Unterschied!') print('Action MCTS: ', action, ' NN: ', position) print('Anteil Leaf-Calls bis Spiel-Ende: '+str(statEnd)+' = '+str(int(statEnd*100/countSearch))+'%') print('') if spiel.spielBeendet: # print('Gewinner:', spiel.gewinner, 'S:', spiel.pktSchwarz, 'W:', spiel.pktWeiss) if PLAY_STATISTIK == 1: spiel.sgfWrite(zuege, values) if spiel.gewinner == 1: net1_result = 1 if cur_player == 1: result = 1 else: result = -1 elif spiel.gewinner == -1: net1_result = -1 if cur_player == 1: result = -1 else: result = 1 else: result = 0 net1_result = 0 break cur_player = 1-cur_player state = spiel.bToInt() step += 1 if step >= steps_before_tau_0: tau = 0 if PLAY_STATISTIK > 0: if stat == 'Diff': print('play game Unterschiede MCTS zu NN: '+str(countDiff)+' = '+str(int(countDiff*100/(step+1)))+'%') elif stat == 'Leaf': print('Anteil Leaf-Calls bis Spiel-Ende insg: ' + str(countEnd) + ' = ' + str(int(countEnd*100/(countSearch*(step+1)))) + '%') if replay_buffer is not None: for state, cur_player, probs in reversed(game_history): for drehung in (0, 90, 180, 270, 1, 2, 3, 4): replay_buffer.append((gameGo.drehB2(state, drehung), cur_player, gameGo.drehPosition(probs, drehung), result)) result = -result return net1_result, step