def _read_thor_file(self, file_name): file_header_size = 16 record_header_size = 8 shots = 60 record_size = 68 games = [] with open(file_name, "rb") as f: c = f.read() board_size = _byte_to_int(c[12]) if board_size == 8 or board_size == 0: for i in xrange(file_header_size, len(c), record_size): moves = [] b = Board() player = Board.BLACK black_score = _byte_to_int(c[i + 6]) for j in xrange(record_header_size, record_size): play = _byte_to_int(c[i + j]) if play > 0: column = (play % 10) - 1 row = (play // 10) - 1 if not b.is_feasible(row, column, player): player = Board.opponent(player) moves.append((player, row, column)) b.flip(row, column, player) player = Board.opponent(player) score = b.score(Board.BLACK) if b.score(Board.BLACK) > b.score(Board.WHITE): score += b.score(Board.BLANK) if score == black_score: games.append((moves, black_score * 2 - 64)) else: self.inconsistencies += 1 return games
def self_play(n, model): b = Board() for t in xrange(1, n+1): b.init_board() p = Board.BLACK while not b.is_terminal_state(): options = b.feasible_pos(p) vals = [] if len(options) > 0: for i,j in options: with b.flip2(i, j, p): if b.is_terminal_state(): vals.append(b.score(Board.BLACK) - b.score(Board.WHITE)) else: vals.append(model(b)) (a0, a1), v = epsilon_greedy(0.07, options, vals, p == Board.BLACK) model.update(b, v) b.flip(a0, a1, p) p = Board.opponent(p) if t % 100 == 0: logging.info("Number of games played: {}".format(t)) logging.info(b.cache_status()) if t % 1000 == 0: model.save("./model/model.cpt") model.save("./model/model.cpt")
def _alpha_beta_search(self, board, player, alpha, beta, depth, is_maximizing_player): if board.is_terminal_state() or depth == 0: return self._evaluator(board), None act = None if is_maximizing_player: r = AlphaBeta.MIN_VAL else: r = AlphaBeta.MAX_VAL actions = board.feasible_pos(player) opponent = Board.opponent(player) if len(actions) > 0: for i, j in actions: with board.flip2(i, j, player): v, _ = self._alpha_beta_search(board, opponent, alpha, beta, depth - 1, not is_maximizing_player) if is_maximizing_player: if r < v: act = (i, j) alpha = max(v, alpha) r = max(r, v) else: if r > v: act = (i, j) beta = min(v, beta) r = min(r, v) if alpha >= beta: break else: r, _ = self._alpha_beta_search(board, opponent, alpha, beta, depth, not is_maximizing_player) return r, act