def test_static_env(): from cchess_alphazero.environment.env import CChessEnv import cchess_alphazero.environment.static_env as senv from cchess_alphazero.environment.static_env import INIT_STATE from cchess_alphazero.environment.lookup_tables import flip_move env = CChessEnv() env.reset() print("env: " + env.observation) print("senv: " + INIT_STATE) state = INIT_STATE env.step('0001') state = senv.step(state, '0001') print(senv.evaluate(state)) print("env: " + env.observation) print("senv: " + state) env.step('7770') state = senv.step(state, flip_move('7770')) print(senv.evaluate(state)) print("env: " + env.observation) print("senv: " + state) env.render() board = senv.state_to_board(state) for i in range(9, -1, -1): print(board[i]) print("env: ") print(env.input_planes()[0+7:3+7]) print("senv: ") print(senv.state_to_planes(state)[0+7:3+7]) print(f"env: {env.board.legal_moves()}" ) print(f"senv: {senv.get_legal_moves(state)}") print(set(env.board.legal_moves()) == set(senv.get_legal_moves(state)))
def test_onegreen(): import cchess_alphazero.environment.static_env as senv from cchess_alphazero.environment.lookup_tables import flip_move init = '9999299949999999249999869999999958999999519999999999999999997699' state = senv.init(init) print(state) senv.render(state) move = senv.parse_onegreen_move('8685') state = senv.step(state, move) print(state) senv.render(state) move = senv.parse_onegreen_move('7666') state = senv.step(state, flip_move(move)) print(state) senv.render(state)
def start_game(self, idx): pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=True) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=True) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.debug(f"best model is red, ng is black") else: red = self.player2 black = self.player1 logger.debug(f"best model is black, ng is red") state = senv.INIT_STATE value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns) else: action, _ = black.action(state, turns) end_time = time() # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!") value = -1 break state = senv.step(state, action) turns += 1 if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move = senv.done(state) self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value if idx % 2 == 1: value = -value self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns
def info_best_move(self, action, value, depth): self.end_time = time() if not self.is_red_turn: value = -value score = int(value * 1000) duration = self.end_time - self.start_time nps = int(depth * 100 / duration) * 1000 print(f"info depth {depth} score {score} time {int(duration * 1000)} nps {nps}") logger.debug(f"info depth {depth} score {score} time {int((self.end_time - self.start_time) * 1000)}") sys.stdout.flush() # get ponder state = senv.step(self.state, action) ponder = None if state in self.search_tree: node = self.search_tree[state] cnt = 0 for mov, action_state in node.a.items(): if action_state.n > cnt: ponder = mov cnt = action_state.n if not self.is_red_turn: action = flip_move(action) action = senv.to_uci_move(action) output = f"bestmove {action}" if ponder: if self.is_red_turn: ponder = flip_move(ponder) ponder = senv.to_uci_move(ponder) output += f" ponder {ponder}" print(output) logger.debug(output) sys.stdout.flush()
def expanding_data(data, use_history=False): state = data[0] real_data = [] action = None policy = None value = None if use_history: history = [state] else: history = None for item in data[1:]: action = item[0] value = item[1] try: policy = build_policy(action, flip=False) except Exception as e: logger.error( f"Expand data error {e}, item = {item}, data = {data}, state = {state}" ) return None real_data.append([state, policy, value]) state = senv.step(state, action) if use_history: history.append(action) history.append(state) return convert_to_trainging_data(real_data, history)
def test_ucci(): import cchess_alphazero.environment.static_env as senv from cchess_alphazero.environment.lookup_tables import flip_move state = senv.INIT_STATE state = senv.step(state, '0001') fen = senv.state_to_fen(state, 1) print(fen) senv.render(state) move = 'b7b0' move = senv.parse_ucci_move(move) print(f'Parsed move {move}') move = flip_move(move) print(f'fliped move {move}') state = senv.step(state, move) senv.render(state) fen = senv.state_to_fen(state, 2) print(fen)
def fixbug(): from cchess_alphazero.config import Config from cchess_alphazero.lib.data_helper import get_game_data_filenames, read_game_data_from_file, write_game_data_to_file import cchess_alphazero.environment.static_env as senv c = Config('distribute') files = get_game_data_filenames(c.resource) cnt = 0 fix = 0 draw_cnt = 0 for filename in files: try: data = read_game_data_from_file(filename) except: print(f"error: {filename}") os.remove(filename) continue state = data[0] real_data = [state] need_fix = True draw = False action = None value = None is_red_turn = True for item in data[1:]: action = item[0] value = -item[1] if value == 0: need_fix = False draw = True draw_cnt += 1 break state = senv.step(state, action) is_red_turn = not is_red_turn real_data.append([action, value]) if not draw: game_over, v, final_move = senv.done(state) if final_move: v = -v is_red_turn = not is_red_turn if not is_red_turn: v = -v if not game_over: v = 1 # print(game_over, v, final_move, state) if v == data[1][1]: need_fix = False else: need_fix = True if need_fix: write_game_data_to_file(filename, real_data) # print(filename) fix += 1 cnt += 1 if cnt % 1000 == 0: print(cnt, fix, draw_cnt) print(f"all {cnt}, fix {fix}, draw {draw_cnt}")
def cmd_position(self): ''' position {fen <fenstring> | startpos } [moves <move1> .... <moven>] ''' if not self.is_ready: return move_idx = -1 if len(self.args) > 0: if self.args[0] == 'fen': # init with fen string fen = self.args[1] try: self.state = senv.fen_to_state(fen) except Exception as e: logger.error(f"cmd position error! cmd = {self.args}, {e}") return self.history = [self.state] turn = self.args[2] if turn == 'b': self.state = senv.fliped_state(self.state) self.is_red_turn = False self.turns = (int(self.args[6]) - 1) * 2 + 1 else: self.is_red_turn = True self.turns = (int(self.args[6]) - 1) * 2 if len(self.args) > 7 and self.args[7] == 'moves': move_idx = 8 elif self.args[0] == 'startpos': self.state = senv.INIT_STATE self.is_red_turn = True self.history = [self.state] self.turns = 0 if len(self.args) > 1 and self.args[1] == 'moves': move_idx = 2 elif self.args[0] == 'moves': move_idx = 1 else: self.state = senv.INIT_STATE self.is_red_turn = True self.history = [self.state] self.turns = 0 logger.debug(f"state = {self.state}") # senv.render(self.state) # execute moves if move_idx != -1: for i in range(move_idx, len(self.args)): action = senv.parse_ucci_move(self.args[i]) if not self.is_red_turn: action = flip_move(action) self.history.append(action) self.state = senv.step(self.state, action) self.is_red_turn = not self.is_red_turn self.turns += 1 self.history.append(self.state) logger.debug(f"state = {self.state}")
def MCTS_search(self, state, history=[], is_root_node=False) -> float: """ Monte Carlo Tree Search """ while True: # logger.debug(f"start MCTS, state = {state}, history = {history}") game_over, v, _ = senv.done(state) if game_over: self.executor.submit(self.update_tree, None, v, history) break with self.node_lock[state]: if state not in self.tree: # Expand and Evaluate self.tree[state].sum_n = 1 self.tree[state].legal_moves = senv.get_legal_moves(state) self.tree[state].waiting = True # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}") self.expand_and_evaluate(state, history) break if state in history[:-1]: # loop -> loss # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}") self.executor.submit(self.update_tree, None, 0, history) break # Select node = self.tree[state] if node.waiting: node.visit.append(history) # logger.debug(f"wait for prediction state = {state}") break sel_action = self.select_action_q_and_u(state, is_root_node) virtual_loss = self.config.play.virtual_loss self.tree[state].sum_n += 1 # logger.debug(f"node = {state}, sum_n = {node.sum_n}") action_state = self.tree[state].a[sel_action] action_state.n += virtual_loss action_state.w -= virtual_loss action_state.q = action_state.w / action_state.n # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}") if action_state.next is None: action_state.next = senv.step(state, sel_action) # logger.debug(f"step action {sel_action}, next = {action_state.next}") history.append(sel_action) state = action_state.next history.append(state)
def MCTS_search(self, state, history=[], is_root_node=False) -> float: """ Monte Carlo Tree Search """ while True: # logger.debug(f"start MCTS, state = {state}, history = {history}") game_over, v, _ = senv.done(state) if game_over: self.executor.submit(self.update_tree, None, v, history) break with self.node_lock[state]: if state not in self.tree: # Expand and Evaluate self.tree[state].sum_n = 1 self.tree[state].legal_moves = senv.get_legal_moves(state) self.tree[state].waiting = True # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}") self.expand_and_evaluate(state, history) break if state in history[:-1]: # loop -> loss # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}") self.executor.submit(self.update_tree, None, 0, history) break # Select node = self.tree[state] if node.waiting: node.visit.append(history) # logger.debug(f"wait for prediction state = {state}") break sel_action = self.select_action_q_and_u(state, is_root_node) virtual_loss = self.config.play.virtual_loss self.tree[state].sum_n += 1 # logger.debug(f"node = {state}, sum_n = {node.sum_n}") action_state = self.tree[state].a[sel_action] action_state.n += virtual_loss action_state.w -= virtual_loss action_state.q = action_state.w / action_state.n # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}") if action_state.next is None: action_state.next = senv.step(state, sel_action) # logger.debug(f"step action {sel_action}, next = {action_state.next}") history.append(sel_action) state = action_state.next history.append(state)
def test_check_and_catch(): import cchess_alphazero.environment.static_env as senv state = senv.fen_to_state( 'rnba1cbnr/1a7/1c7/p1p3p1p/2p5k/2P1R4/P1P3P1P/1C5C1/9/RNBAKABN1 r') # state = senv.fliped_state(state) ori_state = state senv.render(state) print() action = '4454' state = senv.step(state, action) senv.render(state) state = senv.fliped_state(state) print() senv.render(state) print(senv.will_check_or_catch(ori_state, action))
def print_depth_info(self, state, turns, start_time, value, no_act): ''' info depth xx pv xxx ''' depth = self.done_tasks // 100 end_time = time() pv = "" i = 0 while i < 20: node = self.tree[state] bestmove = None root = True n = 0 if len(node.a) == 0: break for mov, action_state in node.a.items(): if action_state.n >= n: if root and no_act and mov in no_act: continue n = action_state.n bestmove = mov if bestmove is None: logger.error( f"state = {state}, turns = {turns}, no_act = {no_act}, root = {root}, len(as) = {len(node.a)}" ) break state = senv.step(state, bestmove) root = False if turns % 2 == 1: bestmove = flip_move(bestmove) bestmove = senv.to_uci_move(bestmove) pv += " " + bestmove i += 1 turns += 1 if state in self.debug: _, value = self.debug[state] if turns % 2 != self.side: value = -value score = int(value * 1000) duration = end_time - start_time nps = int(depth * 100 / duration) * 1000 output = f"info depth {depth} score {score} time {int(duration * 1000)} pv" + pv + f" nps {nps}" print(output) logger.debug(output) sys.stdout.flush()
def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False, use_history=self.use_history) state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False final_move = None no_eat_count = 0 check = False no_act = [] increase_temp = False while not game_over: start_time = time() action, policy = self.player.action(state, turns, no_act, increase_temp=increase_temp) end_time = time() if action is None: logger.error(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # if self.config.opts.log_move: # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") history.append(action) # policys.append(policy) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done( state, need_check=True) if not game_over: if not senv.has_attack_chessman(state): logger.error(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 increase_temp = False no_act = [] if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i + 1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.error("闲着循环三次,作和棋处理") break if final_move: # policy = self.build_policy(final_move, False) history.append(final_move) # policys.append(policy) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) self.player.close() del search_tree del self.player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value if turns < 10: if random() > 0.9: store = True else: store = False else: store = True if store: data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, store
def self_play_buffer(config, cur, use_history=False) -> (tuple, list): pipe = cur.pop() # borrow if random() > config.play.enable_resign_rate: enable_resign = True else: enable_resign = False player = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe, enable_resign=enable_resign, debugging=False, use_history=use_history) state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 game_over = False final_move = None no_eat_count = 0 check = False no_act = None increase_temp = False while not game_over: start_time = time() action, policy = player.action(state, turns, no_act, increase_temp=increase_temp) end_time = time() if action is None: print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!") value = -1 break print(f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s") # policys.append(policy) history.append(action) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) no_act = [] increase_temp = False if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i+1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i+1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if final_move: # policy = build_policy(final_move, False) history.append(final_move) # policys.append(policy) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) player.close() del player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value cur.append(pipe) return (turns, v), data
def start_game(self, idx): sleep(random()) playouts = randint(8, 12) * 100 self.config.play.simulation_num_per_move = playouts logger.info(f"Set playouts = {self.config.play.simulation_num_per_move}") pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=False, use_history=self.hist_base) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=False, use_history=self.hist_ng) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.info(f"进程id = {self.pid} 基准模型执红,待评测模型执黑") else: red = self.player2 black = self.player1 logger.info(f"进程id = {self.pid} 待评测模型执红,基准模型执黑") state = senv.INIT_STATE history = [state] value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False no_eat_count = 0 check = False increase_temp = False no_act = [] while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp) else: action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp) end_time = time() if self.config.opts.log_move: logger.debug(f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break history.append(action) state, no_eat = senv.new_step(state, action) turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) no_act = [] increase_temp = False if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i+1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i+1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if final_move: history.append(final_move) state = senv.step(state, final_move) turns += 1 value = - value history.append(state) data = [] if idx % 2 == 0: data = [self.data['base']['digest'], self.data['unchecked']['digest']] else: data = [self.data['unchecked']['digest'], self.data['base']['digest']] self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value v = value data.append(history[0]) for i in range(turns): k = i * 2 data.append([history[k + 1], v]) v = -v self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns, data
def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False is_alpha_red = True if idx % 2 == 0 else False final_move = None while not game_over: if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1): no_act = None if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) action, policy = self.player.action(state, turns, no_act) if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break else: fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break if turns % 2 == 1: action = flip_move(action) try: policy = self.build_policy(action, False) except Exception as e: logger.error(f"Build policy error {e}, action = {action}, state = {state}, fen = {fen}") value = 0 break history.append(action) policys.append(policy) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = senv.evaluate(state) else: game_over, value, final_move = senv.done(state) if final_move: policy = self.build_policy(final_move, False) history.append(final_move) policys.append(policy) state = senv.step(state, final_move) history.append(state) self.player.close() if turns % 2 == 1: # balck turn value = -value v = value if v == 0 or turns <= 10: if random() > 0.7: store = True else: store = False else: store = True if store: data = [] for i in range(turns): k = i * 2 data.append([history[k], policys[i], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, search_tree, store
def self_play_buffer(config, pipes_bt, pipes_ng, idx, res_data, hist_base, hist_ng) -> (tuple, list): sleep(random()) playouts = randint(8, 12) * 100 config.play.simulation_num_per_move = playouts logger.info(f"Set playouts = {config.play.simulation_num_per_move}") pipe1 = pipes_bt.pop() # borrow pipe2 = pipes_ng.pop() player1 = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe1, enable_resign=False, debugging=False, use_history=hist_base) player2 = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe2, enable_resign=False, debugging=False, use_history=hist_ng) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = player1 black = player2 print(f"基准模型执红,待评测模型执黑") else: red = player2 black = player1 print(f"待评测模型执红,基准模型执黑") state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 game_over = False final_move = None no_eat_count = 0 check = False increase_temp = False no_act = [] while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp) else: action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp) end_time = time() if action is None: print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!") value = -1 break print( f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s" ) # policys.append(policy) history.append(action) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) no_act = [] increase_temp = False if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i + 1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if final_move: history.append(final_move) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) data = [] if idx % 2 == 0: data = [res_data['base']['digest'], res_data['unchecked']['digest']] else: data = [res_data['unchecked']['digest'], res_data['base']['digest']] player1.close() player2.close() del player1, player2 gc.collect() if turns % 2 == 1: # balck turn value = -value v = value data.append(history[0]) for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value pipes_bt.append(pipe1) pipes_ng.append(pipe2) return (turns, v, idx), data
def self_play_buffer(config, cur) -> (tuple, list): pipe = cur.pop() # borrow if random() > config.play.enable_resign_rate: enable_resign = True else: enable_resign = False player = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] policys = [] value = 0 turns = 0 game_over = False final_move = None while not game_over: no_act = None if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) start_time = time() action, policy = player.action(state, turns, no_act) end_time = time() if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # logger.debug(f"Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") policys.append(policy) history.append(action) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= config.play.max_game_length: game_over = True value = senv.evaluate(state) else: game_over, value, final_move = senv.done(state) if final_move: policy = build_policy(final_move, False) history.append(final_move) policys.append(policy) state = senv.step(state, final_move) history.append(state) player.close() if turns % 2 == 1: # balck turn value = -value v = value data = [] for i in range(turns): k = i * 2 data.append([history[k], policys[i], value]) value = -value cur.append(pipe) return (turns, v), data
def start_game(self, idx): pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=True) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=True) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.debug(f"best model is red, ng is black") else: red = self.player2 black = self.player1 logger.debug(f"best model is black, ng is red") state = senv.INIT_STATE value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns) else: action, _ = black.action(state, turns) end_time = time() # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!") value = -1 break state = senv.step(state, action) turns += 1 if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move = senv.done(state) self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value if idx % 2 == 1: # return player1' value value = -value self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns
def MCTS_search(self, state, history=[], is_root_node=False, real_hist=None) -> float: """ Monte Carlo Tree Search """ while True: # logger.debug(f"start MCTS, state = {state}, history = {history}") game_over, v, _ = senv.done(state) if game_over: v = v * 2 self.executor.submit(self.update_tree, None, v, history) break with self.node_lock[state]: if state not in self.tree: # Expand and Evaluate self.tree[state].sum_n = 1 self.tree[state].legal_moves = senv.get_legal_moves(state) self.tree[state].waiting = True # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}") if is_root_node and real_hist: self.expand_and_evaluate(state, history, real_hist) else: self.expand_and_evaluate(state, history) break if state in history[:-1]: # loop for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i + 1]): self.executor.submit(self.update_tree, None, -1, history) elif senv.be_catched(state, history[i + 1]): self.executor.submit(self.update_tree, None, 1, history) else: # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}") self.executor.submit(self.update_tree, None, 0, history) break break # Select node = self.tree[state] if node.waiting: node.visit.append(history) # logger.debug(f"wait for prediction state = {state}") break sel_action = self.select_action_q_and_u(state, is_root_node) virtual_loss = self.config.play.virtual_loss self.tree[state].sum_n += 1 # logger.debug(f"node = {state}, sum_n = {node.sum_n}") action_state = self.tree[state].a[sel_action] action_state.n += virtual_loss action_state.w -= virtual_loss action_state.q = action_state.w / action_state.n # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}") # if action_state.next is None: history.append(sel_action) state = senv.step(state, sel_action) history.append(state)
def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False final_move = None while not game_over: no_act = None if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) start_time = time() action, policy = self.player.action(state, turns, no_act) end_time = time() if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # for move, action_state in self.player.search_results.items(): # if action_state[0] >= 20: # logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}") # self.player.search_results = {} history.append(action) policys.append(policy) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = senv.evaluate(state) else: game_over, value, final_move = senv.done(state) if final_move: policy = self.build_policy(final_move, False) history.append(final_move) policys.append(policy) state = senv.step(state, final_move) history.append(state) self.player.close() if turns % 2 == 1: # balck turn value = -value v = value if v == 0: if random() > 0.5: store = True else: store = False else: store = True if store: data = [] for i in range(turns): k = i * 2 data.append([history[k], policys[i], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, search_tree, store
def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False final_move = None while not game_over: no_act = None if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) start_time = time() action, policy = self.player.action(state, turns, no_act) end_time = time() if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # for move, action_state in self.player.search_results.items(): # if action_state[0] >= 20: # logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}") # self.player.search_results = {} history.append(action) policys.append(policy) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = senv.evaluate(state) else: game_over, value, final_move = senv.done(state) if final_move: policy = self.build_policy(final_move, False) history.append(final_move) policys.append(policy) state = senv.step(state, final_move) history.append(state) self.player.close() if turns % 2 == 1: # balck turn value = -value v = value if v == 0: if random() > 0.5: store = True else: store = False else: store = True if store: data = [] for i in range(turns): k = i * 2 data.append([history[k], policys[i], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, search_tree, store
def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] value = 0 turns = 0 # even == red; odd == black game_over = False is_alpha_red = True if idx % 2 == 0 else False final_move = None check = False while not game_over: if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1): no_act = None if not check and state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) action, _ = self.player.action(state, turns, no_act) if action is None: logger.debug( f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break else: fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: logger.debug( f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break if turns % 2 == 1: action = flip_move(action) history.append(action) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done( state, need_check=True) if final_move: history.append(final_move) state = senv.step(state, final_move) history.append(state) turns += 1 value = -value self.player.close() del search_tree del self.player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value if turns <= 10: if random() > 0.7: store = True else: store = False else: store = True if store: data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, store