def start_game(self, idx): pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=True) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=True) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.debug(f"best model is red, ng is black") else: red = self.player2 black = self.player1 logger.debug(f"best model is black, ng is red") state = senv.INIT_STATE value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns) else: action, _ = black.action(state, turns) end_time = time() # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!") value = -1 break state = senv.step(state, action) turns += 1 if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move = senv.done(state) self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value if idx % 2 == 1: value = -value self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns
def start(self, human_first=True): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) self.human_move_first = human_first labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() while not self.env.board.is_end(): if human_first == self.env.red_to_move: self.env.board.calc_chessmans_moving_list() is_correct_chessman = False is_correct_position = False chessman = None while not is_correct_chessman: title = "请输入棋子位置: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int( input_chessman_pos[1]) chessman = self.env.board.chessmans[x][y] if chessman != None and chessman.is_red == self.env.board.is_red_turn: is_correct_chessman = True print(f"当前棋子为{chessman.name_cn},可以落子的位置有:") for point in chessman.moving_list: print(point.x, point.y) else: print("没有找到此名字的棋子或未轮到此方走子") while not is_correct_position: title = "请输入落子的位置: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int( input_chessman_pos[1]) is_correct_position = chessman.move(x, y) if is_correct_position: self.env.board.print_to_cl() self.env.board.clear_chessmans_moving_list() else: action, policy = self.ai.action(self.env.get_state(), self.env.num_halfmoves) if not self.env.red_to_move: action = flip_move(action) if action is None: print("AI投降了!") break self.env.step(action) print(f"AI选择移动 {action}") self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record()
class ObSelfPlay: def __init__(self, config: Config): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.chessmans = None def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def start(self): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() history = [self.env.get_state()] while not self.env.board.is_end(): no_act = None state = self.env.get_state() if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) action, _ = self.ai.action(state, self.env.num_halfmoves, no_act) history.append(action) if action is None: print("AI投降了!") break move = self.env.board.make_single_record(int(action[0]), int(action[1]), int(action[2]), int(action[3])) if not self.env.red_to_move: action = flip_move(action) self.env.step(action) history.append(self.env.get_state()) print(f"AI选择移动 {move}") self.env.board.print_to_cl() sleep(1) self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record()
def cmd_go(self): ''' go ... 让引擎根据内置棋盘的设置和设定的搜索方式来思考,有以下搜索方式可供选择(可以多选,直接跟在go后面): ❌(1) searchmoves <move1> .... <moven>,只让引擎在这几步中选择一步; ✅(2) wtime <x>,白方剩余时间(单位是毫秒); btime <x>,黑方剩余时间; ❌winc <x>,白方每步增加的时间(适用于Fischer制); ❌binc <x>,黑方每步增加的时间; ❌movestogo <x>,还有多少回合进入下一时段(适用于时段制); 这些选项用来设定时钟,它决定了引擎的思考时间; ❌(3) ponder,让引擎进行后台思考(即对手在用时,引擎的时钟不起作用); ✅(4) depth <x>,指定搜索深度; ❌(5) nodes <x>,指定搜索的节点数(即分析的局面数,一般它和时间成正比); ❌(6) mate <x>,在指定步数内只搜索杀棋; ✅(7) movetime <x>,只花规定的时间搜索; ✅(8) infinite,无限制搜索,直到杀棋。 ''' if not self.is_ready: return self.start_time = time() self.t = None depth = None infinite = True self.remain_time = None self.model.close_pipes() self.pipe = self.model.get_pipes(need_reload=False) self.search_tree = defaultdict(VisitState) self.player = CChessPlayer(self.config, search_tree=self.search_tree, pipes=self.pipe, enable_resign=False, debugging=True, uci=True, use_history=self.use_history, side=self.turns % 2) for i in range(len(self.args)): if self.args[i] == 'depth': depth = int(self.args[i + 1]) * 100 infinite = False if self.args[i] == 'movetime' or self.args[i] == 'time': self.remain_time = int(self.args[i + 1]) / 1000 if self.args[i] == 'infinite': infinite = True if self.args[i] == 'wtime': if self.is_red_turn: self.remain_time = int(self.args[i + 1]) / 1000 depth = 3000 infinite = False if self.args[i] == 'btime': if not self.is_red_turn: self.remain_time = int(self.args[i + 1]) / 1000 depth = 3000 infinite = False logger.debug(f"depth = {depth}, infinite = {infinite}, remain_time = {self.remain_time}") search_worker = Thread(target=self.search_action, args=(depth, infinite)) search_worker.daemon = True search_worker.start() if self.remain_time: self.t = Timer(self.remain_time - 0.01, self.cmd_stop) self.t.start()
def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() env = CChessEnv(self.config).reset() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) self.red = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes) self.black = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes) history = [] cc = 0 while not env.done: start_time = time() if env.red_to_move: action = self.red.action(env) else: action = self.black.action(env) end_time = time() logger.debug( f"Process{self.pid} Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s" ) env.step(action) history.append(action) if len(history) > 6 and history[-1] == history[-5]: cc = cc + 1 else: cc = 0 if env.num_halfmoves / 2 >= self.config.play.max_game_length: env.winner = Winner.draw if env.winner == Winner.red: red_win = 1 elif env.winner == Winner.black: red_win = -1 else: red_win = 0 if env.num_halfmoves <= 10: logger.debug(f"History moves: {history}") self.red.finish_game(red_win) self.black.finish_game(-red_win) self.cur_pipes.append(pipes) self.save_record_data(env, write=idx % self.config.play_data.nb_game_save_record == 0) self.save_play_data(idx) self.remove_play_data() return env, search_tree
def self_play_buffer(config, cur) -> (CChessEnv, list): pipes = cur.pop() # borrow env = CChessEnv(config).reset() search_tree = defaultdict(VisitState) red = CChessPlayer(config, search_tree=search_tree, pipes=pipes) black = CChessPlayer(config, search_tree=search_tree, pipes=pipes) history = [] cc = 0 while not env.done: start_time = time() if env.red_to_move: action = red.action(env) else: action = black.action(env) end_time = time() logger.debug( f"Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s" ) env.step(action) history.append(action) if len(history) > 6 and history[-1] == history[-5]: cc = cc + 1 else: cc = 0 if env.num_halfmoves / 2 >= config.play.max_game_length: env.winner = Winner.draw if cc >= 4: if env.red_to_move: env.winner = Winner.black else: env.winner = Winner.red if env.winner == Winner.red: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) red.finish_game(-black_win) data = [] for i in range(len(red.moves)): data.append(red.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def start(self, human_first=True): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) self.human_move_first = human_first labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() while not self.env.board.is_end(): if human_first == self.env.red_to_move: self.env.board.calc_chessmans_moving_list() is_correct_chessman = False is_correct_position = False chessman = None while not is_correct_chessman: title = "请输入棋子位置: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int(input_chessman_pos[1]) chessman = self.env.board.chessmans[x][y] if chessman != None and chessman.is_red == self.env.board.is_red_turn: is_correct_chessman = True print(f"当前棋子为{chessman.name_cn},可以落子的位置有:") for point in chessman.moving_list: print(point.x, point.y) else: print("没有找到此名字的棋子或未轮到此方走子") while not is_correct_position: title = "请输入落子的位置: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int(input_chessman_pos[1]) is_correct_position = chessman.move(x, y) if is_correct_position: self.env.board.print_to_cl() self.env.board.clear_chessmans_moving_list() else: action, policy = self.ai.action(self.env.get_state(), self.env.num_halfmoves) if not self.env.red_to_move: action = flip_move(action) if action is None: print("AI投降了!") break self.env.step(action) print(f"AI选择移动 {action}") self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record()
class EvaluateWorker: def __init__(self, config: Config, pipes1=None, pipes2=None, pid=None): self.config = config self.player_bt = None self.player_ng = None self.pid = pid self.pipes_bt = pipes1 self.pipes_ng = pipes2 def start(self): ran = self.config.play.max_processes * 2 sleep((self.pid % ran) * 10) logger.debug( f"Evaluate#Start Process index = {self.pid}, pid = {os.getpid()}") score = 0 total_score = 0 red_new_win = 0 red_new_fail = 0 red_new_draw = 0 black_new_win = 0 black_new_fail = 0 black_new_draw = 0 for idx in range(self.config.eval.game_num): start_time = time() value, turns = self.start_game(idx) end_time = time() if (value == 1 and idx % 2 == 0) or (value == -1 and idx % 2 == 1): if idx % 2 == 0: black_new_fail += 1 else: red_new_fail += 1 result = '基准模型胜' elif (value == 1 and idx % 2 == 1) or (value == -1 and idx % 2 == 0): if idx % 2 == 0: black_new_win += 1 else: red_new_win += 1 result = '待评测模型胜' else: if idx % 2 == 0: black_new_draw += 1 else: red_new_draw += 1 result = '和棋' if value == -1: # loss score = 0 elif value == 1: # win score = 1 else: score = 0.5 if idx % 2 == 0: score = 1 - score else: score = score logger.info( f"进程{self.pid}评测完毕 用时{(end_time - start_time):.1f}秒, " f"{turns / 2}回合, {result}, 得分:{score}, value = {value}, idx = {idx}" ) total_score += score return (total_score, red_new_win, red_new_draw, red_new_fail, black_new_win, black_new_draw, black_new_fail) def start_game(self, idx): pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) playouts = randint(8, 12) * 100 self.config.play.simulation_num_per_move = playouts logger.info( f"Set playouts = {self.config.play.simulation_num_per_move}") self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=False) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=False) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.debug(f"best model is red, ng is black") else: red = self.player2 black = self.player1 logger.debug(f"best model is black, ng is red") state = senv.INIT_STATE history = [state] value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False no_eat_count = 0 check = False while not game_over: start_time = time() no_act = None increase_temp = False if not check and state in history[:-1]: no_act = [] increase_temp = True free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: # 如果走了下一步是将军或捉:禁止走那步 if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) # 否则当作闲着处理 else: free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if game_over: break if turns % 2 == 0: action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp) else: action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp) end_time = time() if self.config.opts.log_move: logger.debug( f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}" ) if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break history.append(action) state, no_eat = senv.new_step(state, action) turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done( state, need_check=True) if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if final_move: history.append(final_move) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns
def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False final_move = None while not game_over: no_act = None if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) start_time = time() action, policy = self.player.action(state, turns, no_act) end_time = time() if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # for move, action_state in self.player.search_results.items(): # if action_state[0] >= 20: # logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}") # self.player.search_results = {} history.append(action) policys.append(policy) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = senv.evaluate(state) else: game_over, value, final_move = senv.done(state) if final_move: policy = self.build_policy(final_move, False) history.append(final_move) policys.append(policy) state = senv.step(state, final_move) history.append(state) self.player.close() if turns % 2 == 1: # balck turn value = -value v = value if v == 0: if random() > 0.5: store = True else: store = False else: store = True if store: data = [] for i in range(turns): k = i * 2 data.append([history[k], policys[i], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, search_tree, store
def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False is_alpha_red = True if idx % 2 == 0 else False final_move = None while not game_over: if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1): no_act = None if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) action, policy = self.player.action(state, turns, no_act) if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break else: fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break if turns % 2 == 1: action = flip_move(action) try: policy = self.build_policy(action, False) except Exception as e: logger.error(f"Build policy error {e}, action = {action}, state = {state}, fen = {fen}") value = 0 break history.append(action) policys.append(policy) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = senv.evaluate(state) else: game_over, value, final_move = senv.done(state) if final_move: policy = self.build_policy(final_move, False) history.append(final_move) policys.append(policy) state = senv.step(state, final_move) history.append(state) self.player.close() if turns % 2 == 1: # balck turn value = -value v = value if v == 0 or turns <= 10: if random() > 0.7: store = True else: store = False else: store = True if store: data = [] for i in range(turns): k = i * 2 data.append([history[k], policys[i], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, search_tree, store
def start(self, human_first=True): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=True) self.human_move_first = human_first pygame.init() screen, board_background, widget_background = self.init_screen() framerate = pygame.time.Clock() labels = ActionLabelsRed labels_n = len(ActionLabelsRed) current_chessman = None if human_first: self.env.board.calc_chessmans_moving_list() ai_worker = Thread(target=self.ai_move, name="ai_worker") ai_worker.daemon = True ai_worker.start() while not self.env.board.is_end(): for event in pygame.event.get(): if event.type == pygame.QUIT: self.env.board.print_record() self.ai.close(wait=False) game_id = datetime.now().strftime("%Y%m%d-%H%M%S") path = os.path.join( self.config.resource.play_record_dir, self.config.resource.play_record_filename_tmpl % game_id) self.env.board.save_record(path) sys.exit() elif event.type == VIDEORESIZE: pass elif event.type == MOUSEBUTTONDOWN: if human_first == self.env.red_to_move: pressed_array = pygame.mouse.get_pressed() for index in range(len(pressed_array)): if index == 0 and pressed_array[index]: mouse_x, mouse_y = pygame.mouse.get_pos() col_num, row_num = translate_hit_area( mouse_x, mouse_y, self.chessman_w, self.chessman_h) chessman_sprite = select_sprite_from_group( self.chessmans, col_num, row_num) if current_chessman is None and chessman_sprite != None: if chessman_sprite.chessman.is_red == self.env.red_to_move: current_chessman = chessman_sprite chessman_sprite.is_selected = True elif current_chessman != None and chessman_sprite != None: if chessman_sprite.chessman.is_red == self.env.red_to_move: current_chessman.is_selected = False current_chessman = chessman_sprite chessman_sprite.is_selected = True else: move = str(current_chessman.chessman.col_num) + str(current_chessman.chessman.row_num) +\ str(col_num) + str(row_num) success = current_chessman.move( col_num, row_num, self.chessman_w, self.chessman_h) self.history.append(move) if success: self.chessmans.remove( chessman_sprite) chessman_sprite.kill() current_chessman.is_selected = False current_chessman = None self.history.append( self.env.get_state()) elif current_chessman != None and chessman_sprite is None: move = str(current_chessman.chessman.col_num) + str(current_chessman.chessman.row_num) +\ str(col_num) + str(row_num) success = current_chessman.move( col_num, row_num, self.chessman_w, self.chessman_h) self.history.append(move) if success: current_chessman.is_selected = False current_chessman = None self.history.append( self.env.get_state()) self.draw_widget(screen, widget_background) framerate.tick(20) # clear/erase the last drawn sprites self.chessmans.clear(screen, board_background) # update all the sprites self.chessmans.update() self.chessmans.draw(screen) pygame.display.update() self.ai.close(wait=False) logger.info(f"Winner is {self.env.board.winner} !!!") self.env.board.print_record() game_id = datetime.now().strftime("%Y%m%d-%H%M%S") path = os.path.join( self.config.resource.play_record_dir, self.config.resource.play_record_filename_tmpl % game_id) self.env.board.save_record(path) sleep(3)
def self_play_buffer(config, cur) -> (tuple, list): pipe = cur.pop() # borrow if random() > config.play.enable_resign_rate: enable_resign = True else: enable_resign = False player = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] policys = [] value = 0 turns = 0 game_over = False final_move = None while not game_over: no_act = None if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) start_time = time() action, policy = player.action(state, turns, no_act) end_time = time() if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # logger.debug(f"Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") policys.append(policy) history.append(action) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= config.play.max_game_length: game_over = True value = senv.evaluate(state) else: game_over, value, final_move = senv.done(state) if final_move: policy = build_policy(final_move, False) history.append(final_move) policys.append(policy) state = senv.step(state, final_move) history.append(state) player.close() if turns % 2 == 1: # balck turn value = -value v = value data = [] for i in range(turns): k = i * 2 data.append([history[k], policys[i], value]) value = -value cur.append(pipe) return (turns, v), data
class ObSelfPlayUCCI: def __init__(self, config: Config, ai_move_first=True): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.chessmans = None self.ai_move_first = ai_move_first def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def start(self): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() history = [self.env.get_state()] turns = 0 game_over = False final_move = None while not game_over: if (self.ai_move_first and turns % 2 == 0) or (not self.ai_move_first and turns % 2 == 1): start_time = time() no_act = None state = self.env.get_state() if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: act = history[i + 1] if not self.env.red_to_move: act = flip_move(act) no_act.append(act) action, _ = self.ai.action(state, self.env.num_halfmoves, no_act) end_time = time() if action is None: print("AlphaZero 投降了!") break move = self.env.board.make_single_record( int(action[0]), int(action[1]), int(action[2]), int(action[3])) print( f"AlphaZero 选择移动 {move}, 消耗时间 {(end_time - start_time):.2f}s" ) if not self.env.red_to_move: action = flip_move(action) else: state = self.env.get_state() print(state) fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: print("Eleeye 投降了!") break print(action) if not self.env.red_to_move: rec_action = flip_move(action) else: rec_action = action move = self.env.board.make_single_record( int(rec_action[0]), int(rec_action[1]), int(rec_action[2]), int(rec_action[3])) print(f"Eleeye 选择移动 {move}") history.append(action) self.env.step(action) history.append(self.env.get_state()) self.env.board.print_to_cl() turns += 1 sleep(1) game_over, final_move = self.env.board.is_end_final_move() print(game_over, final_move) if final_move: move = self.env.board.make_single_record(int(final_move[0]), int(final_move[1]), int(final_move[2]), int(final_move[3])) print(f"Final Move {move}") if not self.env.red_to_move: final_move = flip_move(final_move) self.env.step(final_move) self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record() def get_ucci_move(self, fen, time=3): p = subprocess.Popen(self.config.resource.eleeye_path, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) setfen = f'position fen {fen}\n' setrandom = 'setoption randomness small\n' cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n' try: out, err = p.communicate(cmd, timeout=time + 0.5) except: p.kill() try: out, err = p.communicate() except Exception as e: logger.error(f"{e}, cmd = {cmd}") return self.get_ucci_move(fen, time + 1) print(out) lines = out.split('\n') if lines[-2] == 'nobestmove': return None move = lines[-2].split(' ')[1] if move == 'depth': move = lines[-1].split(' ')[6] return senv.parse_ucci_move(move)
def self_play_buffer(config, pipes_bt, pipes_ng, idx, res_data, hist_base, hist_ng) -> (tuple, list): sleep(random()) playouts = randint(8, 12) * 100 config.play.simulation_num_per_move = playouts logger.info(f"Set playouts = {config.play.simulation_num_per_move}") pipe1 = pipes_bt.pop() # borrow pipe2 = pipes_ng.pop() player1 = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe1, enable_resign=False, debugging=False, use_history=hist_base) player2 = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe2, enable_resign=False, debugging=False, use_history=hist_ng) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = player1 black = player2 print(f"基准模型执红,待评测模型执黑") else: red = player2 black = player1 print(f"待评测模型执红,基准模型执黑") state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 game_over = False final_move = None no_eat_count = 0 check = False increase_temp = False no_act = [] while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp) else: action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp) end_time = time() if action is None: print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!") value = -1 break print( f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s" ) # policys.append(policy) history.append(action) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) no_act = [] increase_temp = False if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i + 1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if final_move: history.append(final_move) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) data = [] if idx % 2 == 0: data = [res_data['base']['digest'], res_data['unchecked']['digest']] else: data = [res_data['unchecked']['digest'], res_data['base']['digest']] player1.close() player2.close() del player1, player2 gc.collect() if turns % 2 == 1: # balck turn value = -value v = value data.append(history[0]) for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value pipes_bt.append(pipe1) pipes_ng.append(pipe2) return (turns, v, idx), data
def start(self): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() history = [self.env.get_state()] turns = 0 game_over = False final_move = None while not game_over: if (self.ai_move_first and turns % 2 == 0) or (not self.ai_move_first and turns % 2 == 1): start_time = time() no_act = None state = self.env.get_state() if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: act = history[i + 1] if not self.env.red_to_move: act = flip_move(act) no_act.append(act) action, _ = self.ai.action(state, self.env.num_halfmoves, no_act) end_time = time() if action is None: print("AlphaZero 投降了!") break move = self.env.board.make_single_record(int(action[0]), int(action[1]), int(action[2]), int(action[3])) print(f"AlphaZero 选择移动 {move}, 消耗时间 {(end_time - start_time):.2f}s") if not self.env.red_to_move: action = flip_move(action) else: state = self.env.get_state() print(state) fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: print("Eleeye 投降了!") break print(action) if not self.env.red_to_move: rec_action = flip_move(action) else: rec_action = action move = self.env.board.make_single_record(int(rec_action[0]), int(rec_action[1]), int(rec_action[2]), int(rec_action[3])) print(f"Eleeye 选择移动 {move}") history.append(action) self.env.step(action) history.append(self.env.get_state()) self.env.board.print_to_cl() turns += 1 sleep(1) game_over, final_move = self.env.board.is_end_final_move() print(game_over, final_move) if final_move: move = self.env.board.make_single_record(int(final_move[0]), int(final_move[1]), int(final_move[2]), int(final_move[3])) print(f"Final Move {move}") if not self.env.red_to_move: final_move = flip_move(final_move) self.env.step(final_move) self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record()
class ObSelfPlayUCCI: def __init__(self, config: Config, ai_move_first=True): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.chessmans = None self.ai_move_first = ai_move_first def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def start(self): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() history = [self.env.get_state()] turns = 0 game_over = False final_move = None while not game_over: if (self.ai_move_first and turns % 2 == 0) or (not self.ai_move_first and turns % 2 == 1): start_time = time() no_act = None state = self.env.get_state() if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: act = history[i + 1] if not self.env.red_to_move: act = flip_move(act) no_act.append(act) action, _ = self.ai.action(state, self.env.num_halfmoves, no_act) end_time = time() if action is None: print("AlphaZero 投降了!") break move = self.env.board.make_single_record(int(action[0]), int(action[1]), int(action[2]), int(action[3])) print(f"AlphaZero 选择移动 {move}, 消耗时间 {(end_time - start_time):.2f}s") if not self.env.red_to_move: action = flip_move(action) else: state = self.env.get_state() print(state) fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: print("Eleeye 投降了!") break print(action) if not self.env.red_to_move: rec_action = flip_move(action) else: rec_action = action move = self.env.board.make_single_record(int(rec_action[0]), int(rec_action[1]), int(rec_action[2]), int(rec_action[3])) print(f"Eleeye 选择移动 {move}") history.append(action) self.env.step(action) history.append(self.env.get_state()) self.env.board.print_to_cl() turns += 1 sleep(1) game_over, final_move = self.env.board.is_end_final_move() print(game_over, final_move) if final_move: move = self.env.board.make_single_record(int(final_move[0]), int(final_move[1]), int(final_move[2]), int(final_move[3])) print(f"Final Move {move}") if not self.env.red_to_move: final_move = flip_move(final_move) self.env.step(final_move) self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record() def get_ucci_move(self, fen, time=3): p = subprocess.Popen(self.config.resource.eleeye_path, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) setfen = f'position fen {fen}\n' setrandom = 'setoption randomness small\n' cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n' try: out, err = p.communicate(cmd, timeout=time+0.5) except: p.kill() try: out, err = p.communicate() except Exception as e: logger.error(f"{e}, cmd = {cmd}") return self.get_ucci_move(fen, time+1) print(out) lines = out.split('\n') if lines[-2] == 'nobestmove': return None move = lines[-2].split(' ')[1] if move == 'depth': move = lines[-1].split(' ')[6] return senv.parse_ucci_move(move)
def start_game(self, idx): pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=True) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=True) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.debug(f"best model is red, ng is black") else: red = self.player2 black = self.player1 logger.debug(f"best model is black, ng is red") state = senv.INIT_STATE value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns) else: action, _ = black.action(state, turns) end_time = time() # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!") value = -1 break state = senv.step(state, action) turns += 1 if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move = senv.done(state) self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value if idx % 2 == 1: # return player1' value value = -value self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns
class EvaluateWorker: def __init__(self, config: Config, pipes1=None, pipes2=None, pid=None): self.config = config self.player_bt = None self.player_ng = None self.pid = pid self.pipes_bt = pipes1 self.pipes_ng = pipes2 def start(self): logger.debug( f"Evaluate#Start Process index = {self.pid}, pid = {os.getpid()}") score1 = 0 score2 = 0 results = [] for idx in range(self.config.eval.game_num): start_time = time() score, turns = self.start_game(idx) end_time = time() if score < 0: score2 += 1 elif score > 0: score1 += 1 else: score2 += 0.5 score1 += 0.5 results.append((idx, score)) logger.debug( f"Process{self.pid} play game {idx} time={(end_time - start_time):.1f} sec, " f"turn={turns / 2}, model1 {score1} - {score2} model2") return results def start_game(self, idx): pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=True) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=True) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.debug(f"best model is red, ng is black") else: red = self.player2 black = self.player1 logger.debug(f"best model is black, ng is red") state = senv.INIT_STATE value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns) else: action, _ = black.action(state, turns) end_time = time() # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!") value = -1 break state = senv.step(state, action) turns += 1 if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move = senv.done(state) self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value if idx % 2 == 1: # return player1' value value = -value self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns
def start(self): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() history = [self.env.get_state()] turns = 0 game_over = False final_move = None while not game_over: if (self.ai_move_first and turns % 2 == 0) or (not self.ai_move_first and turns % 2 == 1): start_time = time() no_act = None state = self.env.get_state() if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: act = history[i + 1] if not self.env.red_to_move: act = flip_move(act) no_act.append(act) action, _ = self.ai.action(state, self.env.num_halfmoves, no_act) end_time = time() if action is None: print("AlphaZero 投降了!") break move = self.env.board.make_single_record( int(action[0]), int(action[1]), int(action[2]), int(action[3])) print( f"AlphaZero 选择移动 {move}, 消耗时间 {(end_time - start_time):.2f}s" ) if not self.env.red_to_move: action = flip_move(action) else: state = self.env.get_state() print(state) fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: print("Eleeye 投降了!") break print(action) if not self.env.red_to_move: rec_action = flip_move(action) else: rec_action = action move = self.env.board.make_single_record( int(rec_action[0]), int(rec_action[1]), int(rec_action[2]), int(rec_action[3])) print(f"Eleeye 选择移动 {move}") history.append(action) self.env.step(action) history.append(self.env.get_state()) self.env.board.print_to_cl() turns += 1 sleep(1) game_over, final_move = self.env.board.is_end_final_move() print(game_over, final_move) if final_move: move = self.env.board.make_single_record(int(final_move[0]), int(final_move[1]), int(final_move[2]), int(final_move[3])) print(f"Final Move {move}") if not self.env.red_to_move: final_move = flip_move(final_move) self.env.step(final_move) self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record()
class UCI: def __init__(self, config: Config): self.config = config self.args = None self.state = None self.is_red_turn = None self.player = None self.model = None self.pipe = None self.is_ready = False self.search_tree = defaultdict(VisitState) self.remain_time = None self.history = None self.turns = 0 self.start_time = None self.end_time = None self.t = None self.use_history = False def main(self): while True: cmd = input() logger.debug(f"CMD: {cmd}") cmds = cmd.split(' ') self.args = cmds[1:] method = getattr(self, 'cmd_' + cmds[0], None) if method != None: method() else: logger.error(f"Error command: {cmd}") def cmd_uci(self): print('id name CCZero') print('id author https://cczero.org') print('id version 2.4') print('option name gpu spin default 0 min 0 max 7') print('option name Threads spin default 10 min 0 max 1024') print('uciok') sys.stdout.flush() set_session_config(per_process_gpu_memory_fraction=1, allow_growth=True, device_list=self.config.opts.device_list) self.use_history = self.load_model() self.is_ready = True self.turns = 0 self.remain_time = None self.state = senv.INIT_STATE self.history = [self.state] self.is_red_turn = True def cmd_ucinewgame(self): self.state = senv.INIT_STATE self.history = [self.state] self.is_ready = True self.is_red_turn = True self.search_tree = defaultdict(VisitState) def cmd_setoption(self): ''' setoption name <id> [value <x>] ''' if len(self.args) > 3: id = self.args[1] if id == 'gpu': value = int(self.args[3]) self.config.opts.device_list = value set_session_config(per_process_gpu_memory_fraction=1, allow_growth=True, device_list=self.config.opts.device_list) if id == 'Threads': value = int(self.args[3]) self.config.play.search_threads = value def cmd_isready(self): if self.is_ready == True: print('readyok') sys.stdout.flush() logger.debug(f"is_ready = {self.is_ready}") def cmd_position(self): ''' position {fen <fenstring> | startpos } [moves <move1> .... <moven>] ''' if not self.is_ready: return move_idx = -1 if len(self.args) > 0: if self.args[0] == 'fen': # init with fen string fen = self.args[1] try: self.state = senv.fen_to_state(fen) except Exception as e: logger.error(f"cmd position error! cmd = {self.args}, {e}") return self.history = [self.state] turn = self.args[2] if turn == 'b': self.state = senv.fliped_state(self.state) self.is_red_turn = False self.turns = (int(self.args[6]) - 1) * 2 + 1 else: self.is_red_turn = True self.turns = (int(self.args[6]) - 1) * 2 if len(self.args) > 7 and self.args[7] == 'moves': move_idx = 8 elif self.args[0] == 'startpos': self.state = senv.INIT_STATE self.is_red_turn = True self.history = [self.state] self.turns = 0 if len(self.args) > 1 and self.args[1] == 'moves': move_idx = 2 elif self.args[0] == 'moves': move_idx = 1 else: self.state = senv.INIT_STATE self.is_red_turn = True self.history = [self.state] self.turns = 0 logger.debug(f"state = {self.state}") # senv.render(self.state) # execute moves if move_idx != -1: for i in range(move_idx, len(self.args)): action = senv.parse_ucci_move(self.args[i]) if not self.is_red_turn: action = flip_move(action) self.history.append(action) self.state = senv.step(self.state, action) self.is_red_turn = not self.is_red_turn self.turns += 1 self.history.append(self.state) logger.debug(f"state = {self.state}") # senv.render(self.state) def cmd_fen(self): self.args.insert(0, 'fen') self.cmd_position() def cmd_go(self): ''' go ... 让引擎根据内置棋盘的设置和设定的搜索方式来思考,有以下搜索方式可供选择(可以多选,直接跟在go后面): ❌(1) searchmoves <move1> .... <moven>,只让引擎在这几步中选择一步; ✅(2) wtime <x>,白方剩余时间(单位是毫秒); btime <x>,黑方剩余时间; ❌winc <x>,白方每步增加的时间(适用于Fischer制); ❌binc <x>,黑方每步增加的时间; ❌movestogo <x>,还有多少回合进入下一时段(适用于时段制); 这些选项用来设定时钟,它决定了引擎的思考时间; ❌(3) ponder,让引擎进行后台思考(即对手在用时,引擎的时钟不起作用); ✅(4) depth <x>,指定搜索深度; ❌(5) nodes <x>,指定搜索的节点数(即分析的局面数,一般它和时间成正比); ❌(6) mate <x>,在指定步数内只搜索杀棋; ✅(7) movetime <x>,只花规定的时间搜索; ✅(8) infinite,无限制搜索,直到杀棋。 ''' if not self.is_ready: return self.start_time = time() self.t = None depth = None infinite = True self.remain_time = None self.model.close_pipes() self.pipe = self.model.get_pipes(need_reload=False) self.search_tree = defaultdict(VisitState) self.player = CChessPlayer(self.config, search_tree=self.search_tree, pipes=self.pipe, enable_resign=False, debugging=True, uci=True, use_history=self.use_history, side=self.turns % 2) for i in range(len(self.args)): if self.args[i] == 'depth': depth = int(self.args[i + 1]) * 100 infinite = False if self.args[i] == 'movetime' or self.args[i] == 'time': self.remain_time = int(self.args[i + 1]) / 1000 if self.args[i] == 'infinite': infinite = True if self.args[i] == 'wtime': if self.is_red_turn: self.remain_time = int(self.args[i + 1]) / 1000 depth = 3000 infinite = False if self.args[i] == 'btime': if not self.is_red_turn: self.remain_time = int(self.args[i + 1]) / 1000 depth = 3000 infinite = False logger.debug( f"depth = {depth}, infinite = {infinite}, remain_time = {self.remain_time}" ) search_worker = Thread(target=self.search_action, args=(depth, infinite)) search_worker.daemon = True search_worker.start() if self.remain_time: self.t = Timer(self.remain_time - 0.01, self.cmd_stop) self.t.start() def cmd_stop(self): if not self.is_ready: return if self.player: no_act = None if self.state in self.history[:-1]: no_act = [] for i in range(len(self.history) - 1): if self.history[i] == self.state: no_act.append(self.history[i + 1]) action, value, depth = self.player.close_and_return_action( self.state, self.turns, no_act) self.player = None self.model.close_pipes() self.info_best_move(action, value, depth) else: logger.error(f"bestmove none") def cmd_quit(self): sys.exit() def load_model(self, config_file=None): use_history = True self.model = CChessModel(self.config) weight_path = self.config.resource.model_best_weight_path if not config_file: config_path = config.resource.model_best_path use_history = False else: config_path = os.path.join(config.resource.model_dir, config_file) try: if not load_model_weight(self.model, config_path, weight_path): self.model.build() use_history = True except Exception as e: logger.info(f"Exception {e}, 重新加载权重") return self.load_model(config_file='model_128_l1_config.json') logger.info(f"use_history = {use_history}") return use_history def search_action(self, depth, infinite): no_act = None _, _, _, check = senv.done(self.state, need_check=True) logger.debug(f"Check = {check}, state = {self.state}") if not check and self.state in self.history[:-1]: no_act = [] for i in range(len(self.history) - 1): if self.history[i] == self.state: if senv.will_check_or_catch(self.state, self.history[i + 1]): no_act.append(self.history[i + 1]) logger.debug(f"Foul: no act = {no_act}") action, _ = self.player.action(self.state, self.turns, no_act=no_act, depth=depth, infinite=infinite, hist=self.history) if self.t: self.t.cancel() _, value = self.player.debug[self.state] depth = self.player.done_tasks // 100 self.player.close(wait=False) self.player = None self.model.close_pipes() self.info_best_move(action, value, depth) def info_best_move(self, action, value, depth): self.end_time = time() if not self.is_red_turn: value = -value score = int(value * 1000) duration = self.end_time - self.start_time nps = int(depth * 100 / duration) * 1000 print( f"info depth {depth} score {score} time {int(duration * 1000)} nps {nps}" ) logger.debug( f"info depth {depth} score {score} time {int((self.end_time - self.start_time) * 1000)}" ) sys.stdout.flush() # get ponder state = senv.step(self.state, action) ponder = None if state in self.search_tree: node = self.search_tree[state] cnt = 0 for mov, action_state in node.a.items(): if action_state.n > cnt: ponder = mov cnt = action_state.n if not self.is_red_turn: action = flip_move(action) action = senv.to_uci_move(action) output = f"bestmove {action}" if ponder: if self.is_red_turn: ponder = flip_move(ponder) ponder = senv.to_uci_move(ponder) output += f" ponder {ponder}" print(output) logger.debug(output) sys.stdout.flush()
class SelfPlayWorker: def __init__(self, config: Config, pipes=None, pid=None): self.config = config self.player = None self.cur_pipes = pipes self.id = pid self.buffer = [] self.pid = os.getpid() def start(self): self.pid = os.getpid() logger.debug(f"Selfplay#Start Process index = {self.id}, pid = {self.pid}") idx = 1 self.buffer = [] search_tree = defaultdict(VisitState) while True: start_time = time() value, turns, state, search_tree, store = self.start_game(idx, search_tree) end_time = time() if value != 1 and value != -1: winner = 'Draw' elif idx % 2 == 0 and value == 1 or idx % 2 == 1 and value == -1: winner = 'AlphaHe' else: winner = 'Eleeye' logger.debug(f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, " f"turn={turns / 2}, value = {value:.2f}, winner is {winner}") if turns <= 10 and store: senv.render(state) if store: idx += 1 def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False is_alpha_red = True if idx % 2 == 0 else False final_move = None while not game_over: if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1): no_act = None if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) action, policy = self.player.action(state, turns, no_act) if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break else: fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break if turns % 2 == 1: action = flip_move(action) try: policy = self.build_policy(action, False) except Exception as e: logger.error(f"Build policy error {e}, action = {action}, state = {state}, fen = {fen}") value = 0 break history.append(action) policys.append(policy) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = senv.evaluate(state) else: game_over, value, final_move = senv.done(state) if final_move: policy = self.build_policy(final_move, False) history.append(final_move) policys.append(policy) state = senv.step(state, final_move) history.append(state) self.player.close() if turns % 2 == 1: # balck turn value = -value v = value if v == 0 or turns <= 10: if random() > 0.7: store = True else: store = False else: store = True if store: data = [] for i in range(turns): k = i * 2 data.append([history[k], policys[i], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, search_tree, store def get_ucci_move(self, fen, time=3): p = subprocess.Popen(self.config.resource.eleeye_path, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) setfen = f'position fen {fen}\n' setrandom = f'setoption randomness {self.config.opts.random}\n' cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n' try: out, err = p.communicate(cmd, timeout=time+0.5) except subprocess.TimeoutExpired: p.kill() try: out, err = p.communicate() except Exception as e: logger.error(f"{e}, cmd = {cmd}") return self.get_ucci_move(fen, time+1) lines = out.split('\n') if lines[-2] == 'nobestmove': return None move = lines[-2].split(' ')[1] if move == 'depth': move = lines[-1].split(' ')[6] return senv.parse_ucci_move(move) def save_play_data(self, idx, data): self.buffer += data if not idx % self.config.play_data.nb_game_in_file == 0: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return try: for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) except: pass def build_policy(self, action, flip): labels_n = len(ActionLabelsRed) move_lookup = {move: i for move, i in zip(ActionLabelsRed, range(labels_n))} policy = np.zeros(labels_n) policy[move_lookup[action]] = 1 if flip: policy = flip_policy(policy) return list(policy)
class EvaluateWorker: def __init__(self, config: Config, pipes1=None, pipes2=None, pid=None, data=None, hist_base=True, hist_ng=True): self.config = config self.player_bt = None self.player_ng = None self.pid = pid self.pipes_bt = pipes1 self.pipes_ng = pipes2 self.data = data self.hist_base = hist_base self.hist_ng = hist_ng def start(self): sleep((self.pid % self.config.play.max_processes) * 10) logger.debug(f"Evaluate#Start Process index = {self.pid}, pid = {os.getpid()}") need_evaluate = True self.config.opts.evaluate = True while need_evaluate: idx = 0 if random() > 0.5 else 1 start_time = time() value, turns, data = self.start_game(idx) end_time = time() if (value == 1 and idx == 0) or (value == -1 and idx == 1): result = '基准模型胜' elif (value == 1 and idx == 1) or (value == -1 and idx == 0): result = '待评测模型胜' else: result = '和棋' if value == -1: # loss score = 0 elif value == 1: # win score = 1 else: score = 0.5 if idx == 0: score = 1 - score else: score = score logger.info(f"进程{self.pid}评测完毕 用时{(end_time - start_time):.1f}秒, " f"{turns / 2}回合, {result}, 得分:{score}, value = {value}, idx = {idx}") response = self.save_play_data(idx, data, value, score) if response and int(response['status']) == 0: logger.info('评测结果上传成功!') else: logger.info(f"评测结果上传失败,服务器返回{response}") response = http_request(self.config.internet.get_evaluate_model_url) if int(response['status']) == 0 and response['data']['base']['digest'] == self.data['base']['digest']\ and response['data']['unchecked']['digest'] == self.data['unchecked']['digest']: need_evaluate = True logger.info(f"进程{self.pid}继续评测") else: need_evaluate = False logger.info(f"进程{self.pid}终止评测") def start_game(self, idx): sleep(random()) playouts = randint(8, 12) * 100 self.config.play.simulation_num_per_move = playouts logger.info(f"Set playouts = {self.config.play.simulation_num_per_move}") pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=False, use_history=self.hist_base) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=False, use_history=self.hist_ng) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.info(f"进程id = {self.pid} 基准模型执红,待评测模型执黑") else: red = self.player2 black = self.player1 logger.info(f"进程id = {self.pid} 待评测模型执红,基准模型执黑") state = senv.INIT_STATE history = [state] value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False no_eat_count = 0 check = False increase_temp = False no_act = [] while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp) else: action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp) end_time = time() if self.config.opts.log_move: logger.debug(f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break history.append(action) state, no_eat = senv.new_step(state, action) turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) no_act = [] increase_temp = False if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i+1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i+1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if final_move: history.append(final_move) state = senv.step(state, final_move) turns += 1 value = - value history.append(state) data = [] if idx % 2 == 0: data = [self.data['base']['digest'], self.data['unchecked']['digest']] else: data = [self.data['unchecked']['digest'], self.data['base']['digest']] self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value v = value data.append(history[0]) for i in range(turns): k = i * 2 data.append([history[k + 1], v]) v = -v self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns, data def save_play_data(self, idx, data, value, score): rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") filename = rc.play_data_filename_tmpl % game_id path = os.path.join(rc.play_data_dir, filename) logger.info(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, data) logger.info(f"Uploading play data {filename} ...") red, black = data[0], data[1] return self.upload_eval_data(path, filename, red, black, value, score) def upload_eval_data(self, path, filename, red, black, result, score): hash = self.fetch_digest(path) data = {'digest': self.data['unchecked']['digest'], 'red_digest': red, 'black_digest': black, 'result': result, 'score': score, 'hash': hash} response = upload_file(self.config.internet.upload_eval_url, path, filename, data, rm=False) return response def fetch_digest(self, file_path): if os.path.exists(file_path): m = hashlib.sha256() with open(file_path, "rb") as f: m.update(f.read()) return m.hexdigest() return None
def start(self, human_first=True): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) self.human_move_first = human_first labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() while not self.env.board.is_end(): if human_first == self.env.red_to_move: self.env.board.calc_chessmans_moving_list() is_correct_chessman = False is_correct_position = False chessman = None while not is_correct_chessman: title = "Please enter the chess piece position: " input_chessman_pos = input(title) print(input_chessman_pos) x, y = int(input_chessman_pos[0]), int( input_chessman_pos[1]) chessman = self.env.board.chessmans[x][y] if chessman != None and chessman.is_red == self.env.board.is_red_turn: is_correct_chessman = True print( f"The current chess piece is {chessman.name},places where you can play:" ) for point in chessman.moving_list: print(point.x, point.y) else: print( "No chess piece with this name was found or it was not his turn to walk" ) while not is_correct_position: title = "Please enter the location of the child: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int( input_chessman_pos[1]) is_correct_position = chessman.move(x, y) if is_correct_position: self.env.board.print_to_cl() self.env.board.clear_chessmans_moving_list() else: action, policy = self.ai.action(self.env.get_state(), self.env.num_halfmoves) if not self.env.red_to_move: action = flip_move(action) if action is None: print("AI surrendered!") break self.env.step(action) print(f"AI chooses to move {action}") self.env.board.print_to_cl() self.ai.close() print(f"The winner is is {self.env.board.winner} !!!") self.env.board.print_record()
def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False, use_history=self.use_history) state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False final_move = None no_eat_count = 0 check = False no_act = [] increase_temp = False while not game_over: start_time = time() action, policy = self.player.action(state, turns, no_act, increase_temp=increase_temp) end_time = time() if action is None: logger.error(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # if self.config.opts.log_move: # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") history.append(action) # policys.append(policy) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done( state, need_check=True) if not game_over: if not senv.has_attack_chessman(state): logger.error(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 increase_temp = False no_act = [] if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i + 1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.error("闲着循环三次,作和棋处理") break if final_move: # policy = self.build_policy(final_move, False) history.append(final_move) # policys.append(policy) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) self.player.close() del search_tree del self.player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value if turns < 10: if random() > 0.9: store = True else: store = False else: store = True if store: data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, store
class SelfPlayWorker: def __init__(self, config: Config, pipes=None, pid=None): self.config = config self.player = None self.cur_pipes = pipes self.id = pid self.buffer = [] self.pid = os.getpid() def start(self): self.pid = os.getpid() logger.debug( f"Selfplay#Start Process index = {self.id}, pid = {self.pid}") idx = 1 self.buffer = [] while True: search_tree = defaultdict(VisitState) start_time = time() value, turns, state, store = self.start_game(idx, search_tree) end_time = time() if value != 1 and value != -1: winner = 'Draw' elif idx % 2 == 0 and value == 1 or idx % 2 == 1 and value == -1: winner = 'AlphaHe' else: winner = 'Eleeye' logger.debug( f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, " f"turn={turns / 2}, value = {value:.2f}, winner is {winner}") if turns <= 10 and store: senv.render(state) if store: idx += 1 def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] value = 0 turns = 0 # even == red; odd == black game_over = False is_alpha_red = True if idx % 2 == 0 else False final_move = None check = False while not game_over: if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1): no_act = None if not check and state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) action, _ = self.player.action(state, turns, no_act) if action is None: logger.debug( f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break else: fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: logger.debug( f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break if turns % 2 == 1: action = flip_move(action) history.append(action) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done( state, need_check=True) if final_move: history.append(final_move) state = senv.step(state, final_move) history.append(state) turns += 1 value = -value self.player.close() del search_tree del self.player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value if turns <= 10: if random() > 0.7: store = True else: store = False else: store = True if store: data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, store def get_ucci_move(self, fen, time=3): p = subprocess.Popen(self.config.resource.eleeye_path, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) setfen = f'position fen {fen}\n' setrandom = f'setoption randomness {self.config.opts.random}\n' cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n' try: out, err = p.communicate(cmd, timeout=time + 0.5) except subprocess.TimeoutExpired: p.kill() try: out, err = p.communicate() except Exception as e: logger.error(f"{e}, cmd = {cmd}") return self.get_ucci_move(fen, time + 1) lines = out.split('\n') if lines[-2] == 'nobestmove': return None move = lines[-2].split(' ')[1] if move == 'depth': move = lines[-1].split(' ')[6] return senv.parse_ucci_move(move) def save_play_data(self, idx, data): self.buffer += data if not idx % self.config.play_data.nb_game_in_file == 0: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return try: for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) except: pass def build_policy(self, action, flip): labels_n = len(ActionLabelsRed) move_lookup = { move: i for move, i in zip(ActionLabelsRed, range(labels_n)) } policy = np.zeros(labels_n) policy[move_lookup[action]] = 1 if flip: policy = flip_policy(policy) return list(policy)
class SelfPlayWorker: def __init__(self, config: Config, pipes=None, pid=None): self.config = config self.red = None self.black = None self.cur_pipes = pipes self.pid = pid self.buffer = [] def start(self): logger.debug( f"Selfplay#Start Process index = {self.pid}, pid = {os.getpid()}") idx = 1 self.buffer = [] search_tree = defaultdict(VisitState) while True: start_time = time() env, search_tree = self.start_game(idx, search_tree) end_time = time() logger.debug( f"Process{self.pid} play game {idx} time={end_time - start_time} sec, " f"turn={env.num_halfmoves / 2}:{env.winner}") if env.num_halfmoves <= 10: for i in range(10): logger.debug(f"{env.board.screen[i]}") idx += 1 def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() env = CChessEnv(self.config).reset() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) self.red = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes) self.black = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes) history = [] cc = 0 while not env.done: start_time = time() if env.red_to_move: action = self.red.action(env) else: action = self.black.action(env) end_time = time() logger.debug( f"Process{self.pid} Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s" ) env.step(action) history.append(action) if len(history) > 6 and history[-1] == history[-5]: cc = cc + 1 else: cc = 0 if env.num_halfmoves / 2 >= self.config.play.max_game_length: env.winner = Winner.draw if env.winner == Winner.red: red_win = 1 elif env.winner == Winner.black: red_win = -1 else: red_win = 0 if env.num_halfmoves <= 10: logger.debug(f"History moves: {history}") self.red.finish_game(red_win) self.black.finish_game(-red_win) self.cur_pipes.append(pipes) self.save_record_data(env, write=idx % self.config.play_data.nb_game_save_record == 0) self.save_play_data(idx) self.remove_play_data() return env, search_tree def save_play_data(self, idx): data = [] for i in range(len(self.red.moves)): data.append(self.red.moves[i]) if i < len(self.black.moves): data.append(self.black.moves[i]) self.buffer += data if not idx % self.config.play_data.nb_game_in_file == 0: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def save_record_data(self, env, write=False): if not write: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_record_dir, rc.play_record_filename_tmpl % game_id) env.save_records(path) def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return try: for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) except: pass
class EvaluateWorker: def __init__(self, config: Config, pipes1=None, pipes2=None, pid=None): self.config = config self.player_bt = None self.player_ng = None self.pid = pid self.pipes_bt = pipes1 self.pipes_ng = pipes2 def start(self): logger.debug(f"Evaluate#Start Process index = {self.pid}, pid = {os.getpid()}") score1 = 0 score2 = 0 for idx in range(self.config.eval.game_num): start_time = time() score, turns = self.start_game(idx) end_time = time() if score < 0: score2 += 1 elif score > 0: score1 += 1 else: score2 += 0.5 score1 += 0.5 logger.debug(f"Process{self.pid} play game {idx} time={(end_time - start_time):.1f} sec, " f"turn={turns / 2}, best model {score1} - {score2} next generation model") return score2 # return next generation model's score def start_game(self, idx): pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=True) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=True) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.debug(f"best model is red, ng is black") else: red = self.player2 black = self.player1 logger.debug(f"best model is black, ng is red") state = senv.INIT_STATE value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns) else: action, _ = black.action(state, turns) end_time = time() # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!") value = -1 break state = senv.step(state, action) turns += 1 if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move = senv.done(state) self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value if idx % 2 == 1: value = -value self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns
def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] value = 0 turns = 0 # even == red; odd == black game_over = False is_alpha_red = True if idx % 2 == 0 else False final_move = None check = False while not game_over: if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1): no_act = None if not check and state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) action, _ = self.player.action(state, turns, no_act) if action is None: logger.debug( f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break else: fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: logger.debug( f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break if turns % 2 == 1: action = flip_move(action) history.append(action) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done( state, need_check=True) if final_move: history.append(final_move) state = senv.step(state, final_move) history.append(state) turns += 1 value = -value self.player.close() del search_tree del self.player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value if turns <= 10: if random() > 0.7: store = True else: store = False else: store = True if store: data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, store
def start_game(self, idx): sleep(random()) playouts = randint(8, 12) * 100 self.config.play.simulation_num_per_move = playouts logger.info(f"Set playouts = {self.config.play.simulation_num_per_move}") pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=False, use_history=self.hist_base) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=False, use_history=self.hist_ng) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.info(f"进程id = {self.pid} 基准模型执红,待评测模型执黑") else: red = self.player2 black = self.player1 logger.info(f"进程id = {self.pid} 待评测模型执红,基准模型执黑") state = senv.INIT_STATE history = [state] value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False no_eat_count = 0 check = False increase_temp = False no_act = [] while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp) else: action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp) end_time = time() if self.config.opts.log_move: logger.debug(f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break history.append(action) state, no_eat = senv.new_step(state, action) turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) no_act = [] increase_temp = False if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i+1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i+1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if final_move: history.append(final_move) state = senv.step(state, final_move) turns += 1 value = - value history.append(state) data = [] if idx % 2 == 0: data = [self.data['base']['digest'], self.data['unchecked']['digest']] else: data = [self.data['unchecked']['digest'], self.data['base']['digest']] self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value v = value data.append(history[0]) for i in range(turns): k = i * 2 data.append([history[k + 1], v]) v = -v self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns, data
class SelfPlayWorker: def __init__(self, config: Config, pipes=None, pid=None): self.config = config self.player = None self.cur_pipes = pipes self.id = pid self.buffer = [] self.pid = os.getpid() def start(self): logger.debug(f"Selfplay#Start Process index = {self.id}, pid = {self.pid}") idx = 1 self.buffer = [] search_tree = defaultdict(VisitState) while True: start_time = time() value, turns, state, search_tree, store = self.start_game(idx, search_tree) end_time = time() logger.debug(f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, " f"turn={turns / 2}, winner = {value:.2f} (1 = red, -1 = black, 0 draw)") if turns <= 10: senv.render(state) if store: idx += 1 def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False final_move = None while not game_over: no_act = None if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) start_time = time() action, policy = self.player.action(state, turns, no_act) end_time = time() if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # for move, action_state in self.player.search_results.items(): # if action_state[0] >= 20: # logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}") # self.player.search_results = {} history.append(action) policys.append(policy) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = senv.evaluate(state) else: game_over, value, final_move = senv.done(state) if final_move: policy = self.build_policy(final_move, False) history.append(final_move) policys.append(policy) state = senv.step(state, final_move) history.append(state) self.player.close() if turns % 2 == 1: # balck turn value = -value v = value if v == 0: if random() > 0.5: store = True else: store = False else: store = True if store: data = [] for i in range(turns): k = i * 2 data.append([history[k], policys[i], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, search_tree, store def save_play_data(self, idx, data): self.buffer += data if not idx % self.config.play_data.nb_game_in_file == 0: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") filename = rc.play_data_filename_tmpl % game_id path = os.path.join(rc.play_data_dir, filename) logger.info(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, self.buffer) if self.config.internet.distributed: upload_worker = Thread(target=self.upload_play_data, args=(path, filename), name="upload_worker") upload_worker.daemon = True upload_worker.start() self.buffer = [] def upload_play_data(self, path, filename): digest = CChessModel.fetch_digest(self.config.resource.model_best_weight_path) data = {'digest': digest, 'username': self.config.internet.username} response = upload_file(self.config.internet.upload_url, path, filename, data, rm=False) if response is not None and response['status'] == 0: logger.info(f"Upload play data {filename} finished.") else: logger.error(f'Upload play data {filename} failed. {response.msg if response is not None else None}') def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return try: for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) except: pass def build_policy(self, action, flip): labels_n = len(ActionLabelsRed) move_lookup = {move: i for move, i in zip(ActionLabelsRed, range(labels_n))} policy = np.zeros(labels_n) policy[move_lookup[action]] = 1 if flip: policy = flip_policy(policy) return list(policy)
def self_play_buffer(config, cur, use_history=False) -> (tuple, list): pipe = cur.pop() # borrow if random() > config.play.enable_resign_rate: enable_resign = True else: enable_resign = False player = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe, enable_resign=enable_resign, debugging=False, use_history=use_history) state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 game_over = False final_move = None no_eat_count = 0 check = False no_act = None increase_temp = False while not game_over: start_time = time() action, policy = player.action(state, turns, no_act, increase_temp=increase_temp) end_time = time() if action is None: print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!") value = -1 break print(f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s") # policys.append(policy) history.append(action) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) no_act = [] increase_temp = False if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i+1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i+1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if final_move: # policy = build_policy(final_move, False) history.append(final_move) # policys.append(policy) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) player.close() del player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value cur.append(pipe) return (turns, v), data
class SelfPlayWorker: def __init__(self, config: Config, pipes=None, pid=None, use_history=False): self.config = config self.player = None self.cur_pipes = pipes self.id = pid self.buffer = [] self.pid = os.getpid() self.use_history = use_history def start(self): # logger.info("pengge pengge pengge222") # logger.error("pengge pengge pengge111") # logger.debug("pengge pengge pengge") self.pid = os.getpid() ran = self.config.play.max_processes if self.config.play.max_processes > 5 else self.config.play.max_processes * 2 sleep((self.pid % ran) * 10) logger.error( f"Selfplay#Start Process index = {self.id}, pid = {self.pid}") idx = 1 self.buffer = [] search_tree = defaultdict(VisitState) while True: start_time = time() search_tree = defaultdict(VisitState) value, turns, state, store = self.start_game(idx, search_tree) end_time = time() logger.error( f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, " f"turn={turns / 2}, winner = {value:.2f} (1 = red, -1 = black, 0 draw)" ) if turns <= 10: senv.render(state) if store: idx += 1 sleep(random()) def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False, use_history=self.use_history) state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False final_move = None no_eat_count = 0 check = False no_act = [] increase_temp = False while not game_over: start_time = time() action, policy = self.player.action(state, turns, no_act, increase_temp=increase_temp) end_time = time() if action is None: logger.error(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # if self.config.opts.log_move: # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") history.append(action) # policys.append(policy) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done( state, need_check=True) if not game_over: if not senv.has_attack_chessman(state): logger.error(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 increase_temp = False no_act = [] if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i + 1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.error("闲着循环三次,作和棋处理") break if final_move: # policy = self.build_policy(final_move, False) history.append(final_move) # policys.append(policy) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) self.player.close() del search_tree del self.player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value if turns < 10: if random() > 0.9: store = True else: store = False else: store = True if store: data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, store def save_play_data(self, idx, data): self.buffer += data if not idx % self.config.play_data.nb_game_in_file == 0: return rc = self.config.resource utc_dt = datetime.utcnow().replace(tzinfo=timezone.utc) bj_dt = utc_dt.astimezone(timezone(timedelta(hours=8))) game_id = bj_dt.strftime("%Y%m%d-%H%M%S.%f") filename = rc.play_data_filename_tmpl % game_id path = os.path.join(rc.play_data_dir, filename) logger.error(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, self.buffer) if self.config.internet.distributed: upload_worker = Thread(target=self.upload_play_data, args=(path, filename), name="upload_worker") upload_worker.daemon = True upload_worker.start() self.buffer = [] def upload_play_data(self, path, filename): digest = CChessModel.fetch_digest( self.config.resource.model_best_weight_path) data = { 'digest': digest, 'username': self.config.internet.username, 'version': '2.4' } response = upload_file(self.config.internet.upload_url, path, filename, data, rm=False) if response is not None and response['status'] == 0: logger.error(f"Upload play data {filename} finished.") else: logger.error( f'Upload play data {filename} failed. {response.msg if response is not None else None}' ) def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return try: for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) except: pass def build_policy(self, action, flip): labels_n = len(ActionLabelsRed) move_lookup = { move: i for move, i in zip(ActionLabelsRed, range(labels_n)) } policy = np.zeros(labels_n) policy[move_lookup[action]] = 1 if flip: policy = flip_policy(policy) return list(policy)
class SelfPlayWorker: def __init__(self, config: Config, pipes=None, pid=None): self.config = config self.player = None self.cur_pipes = pipes self.id = pid self.buffer = [] self.pid = os.getpid() def start(self): logger.debug( f"Selfplay#Start Process index = {self.id}, pid = {self.pid}") idx = 1 self.buffer = [] search_tree = defaultdict(VisitState) while True: start_time = time() value, turns, state, search_tree, store = self.start_game( idx, search_tree) end_time = time() logger.debug( f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, " f"turn={turns / 2}, winner = {value:.2f} (1 = red, -1 = black, 0 draw)" ) if turns <= 10: senv.render(state) if store: idx += 1 def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False final_move = None while not game_over: no_act = None if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) start_time = time() action, policy = self.player.action(state, turns, no_act) end_time = time() if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # for move, action_state in self.player.search_results.items(): # if action_state[0] >= 20: # logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}") # self.player.search_results = {} history.append(action) policys.append(policy) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = senv.evaluate(state) else: game_over, value, final_move = senv.done(state) if final_move: policy = self.build_policy(final_move, False) history.append(final_move) policys.append(policy) state = senv.step(state, final_move) history.append(state) self.player.close() if turns % 2 == 1: # balck turn value = -value v = value if v == 0: if random() > 0.5: store = True else: store = False else: store = True if store: data = [] for i in range(turns): k = i * 2 data.append([history[k], policys[i], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, search_tree, store def save_play_data(self, idx, data): self.buffer += data if not idx % self.config.play_data.nb_game_in_file == 0: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") filename = rc.play_data_filename_tmpl % game_id path = os.path.join(rc.play_data_dir, filename) logger.info(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, self.buffer) if self.config.internet.distributed: upload_worker = Thread(target=self.upload_play_data, args=(path, filename), name="upload_worker") upload_worker.daemon = True upload_worker.start() self.buffer = [] def upload_play_data(self, path, filename): digest = CChessModel.fetch_digest( self.config.resource.model_best_weight_path) data = {'digest': digest, 'username': self.config.internet.username} response = upload_file(self.config.internet.upload_url, path, filename, data, rm=False) if response is not None and response['status'] == 0: logger.info(f"Upload play data {filename} finished.") else: logger.error( f'Upload play data {filename} failed. {response.msg if response is not None else None}' ) def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return try: for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) except: pass def build_policy(self, action, flip): labels_n = len(ActionLabelsRed) move_lookup = { move: i for move, i in zip(ActionLabelsRed, range(labels_n)) } policy = np.zeros(labels_n) policy[move_lookup[action]] = 1 if flip: policy = flip_policy(policy) return list(policy)
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.winstyle = 0 self.chessmans = None self.human_move_first = True self.screen_width = 720 self.height = 577 self.width = 521 self.chessman_w = 57 self.chessman_h = 57 self.disp_record_num = 15 self.rec_labels = [None] * self.disp_record_num self.nn_value = 0 self.mcts_moves = {} self.history = [] if self.config.opts.bg_style == 'WOOD': self.chessman_w += 1 self.chessman_h += 1 def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def init_screen(self): bestdepth = pygame.display.mode_ok([self.screen_width, self.height], self.winstyle, 32) screen = pygame.display.set_mode([self.screen_width, self.height], self.winstyle, bestdepth) pygame.display.set_caption("中国象棋Zero") # create the background, tile the bgd image bgdtile = load_image(f'{self.config.opts.bg_style}.GIF') bgdtile = pygame.transform.scale(bgdtile, (self.width, self.height)) board_background = pygame.Surface([self.width, self.height]) board_background.blit(bgdtile, (0, 0)) widget_background = pygame.Surface( [self.screen_width - self.width, self.height]) white_rect = Rect(0, 0, self.screen_width - self.width, self.height) widget_background.fill((255, 255, 255), white_rect) #create text label font_file = self.config.resource.font_path font = pygame.font.Font(font_file, 16) font_color = (0, 0, 0) font_background = (255, 255, 255) t = font.render("着法记录", True, font_color, font_background) t_rect = t.get_rect() t_rect.x = 10 t_rect.y = 10 widget_background.blit(t, t_rect) screen.blit(board_background, (0, 0)) screen.blit(widget_background, (self.width, 0)) pygame.display.flip() self.chessmans = pygame.sprite.Group() creat_sprite_group(self.chessmans, self.env.board.chessmans_hash, self.chessman_w, self.chessman_h) return screen, board_background, widget_background def start(self, human_first=True): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=True) self.human_move_first = human_first pygame.init() screen, board_background, widget_background = self.init_screen() framerate = pygame.time.Clock() labels = ActionLabelsRed labels_n = len(ActionLabelsRed) current_chessman = None if human_first: self.env.board.calc_chessmans_moving_list() ai_worker = Thread(target=self.ai_move, name="ai_worker") ai_worker.daemon = True ai_worker.start() while not self.env.board.is_end(): for event in pygame.event.get(): if event.type == pygame.QUIT: self.env.board.print_record() self.ai.close(wait=False) game_id = datetime.now().strftime("%Y%m%d-%H%M%S") path = os.path.join( self.config.resource.play_record_dir, self.config.resource.play_record_filename_tmpl % game_id) self.env.board.save_record(path) sys.exit() elif event.type == VIDEORESIZE: pass elif event.type == MOUSEBUTTONDOWN: if human_first == self.env.red_to_move: pressed_array = pygame.mouse.get_pressed() for index in range(len(pressed_array)): if index == 0 and pressed_array[index]: mouse_x, mouse_y = pygame.mouse.get_pos() col_num, row_num = translate_hit_area( mouse_x, mouse_y, self.chessman_w, self.chessman_h) chessman_sprite = select_sprite_from_group( self.chessmans, col_num, row_num) if current_chessman is None and chessman_sprite != None: if chessman_sprite.chessman.is_red == self.env.red_to_move: current_chessman = chessman_sprite chessman_sprite.is_selected = True elif current_chessman != None and chessman_sprite != None: if chessman_sprite.chessman.is_red == self.env.red_to_move: current_chessman.is_selected = False current_chessman = chessman_sprite chessman_sprite.is_selected = True else: move = str(current_chessman.chessman.col_num) + str(current_chessman.chessman.row_num) +\ str(col_num) + str(row_num) success = current_chessman.move( col_num, row_num, self.chessman_w, self.chessman_h) self.history.append(move) if success: self.chessmans.remove( chessman_sprite) chessman_sprite.kill() current_chessman.is_selected = False current_chessman = None self.history.append( self.env.get_state()) elif current_chessman != None and chessman_sprite is None: move = str(current_chessman.chessman.col_num) + str(current_chessman.chessman.row_num) +\ str(col_num) + str(row_num) success = current_chessman.move( col_num, row_num, self.chessman_w, self.chessman_h) self.history.append(move) if success: current_chessman.is_selected = False current_chessman = None self.history.append( self.env.get_state()) self.draw_widget(screen, widget_background) framerate.tick(20) # clear/erase the last drawn sprites self.chessmans.clear(screen, board_background) # update all the sprites self.chessmans.update() self.chessmans.draw(screen) pygame.display.update() self.ai.close(wait=False) logger.info(f"Winner is {self.env.board.winner} !!!") self.env.board.print_record() game_id = datetime.now().strftime("%Y%m%d-%H%M%S") path = os.path.join( self.config.resource.play_record_dir, self.config.resource.play_record_filename_tmpl % game_id) self.env.board.save_record(path) sleep(3) def ai_move(self): ai_move_first = not self.human_move_first self.history = [self.env.get_state()] no_act = None while not self.env.done: if ai_move_first == self.env.red_to_move: labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.ai.search_results = {} state = self.env.get_state() logger.info(f"state = {state}") _, _, _, check = senv.done(state, need_check=True) if not check and state in self.history[:-1]: no_act = [] free_move = defaultdict(int) for i in range(len(self.history) - 1): if self.history[i] == state: # 如果走了下一步是将军或捉:禁止走那步 if senv.will_check_or_catch( state, self.history[i + 1]): no_act.append(self.history[i + 1]) # 否则当作闲着处理 else: free_move[state] += 1 if free_move[state] >= 2: # 作和棋处理 self.env.winner = Winner.draw self.env.board.winner = Winner.draw break if no_act: logger.debug(f"no_act = {no_act}") action, policy = self.ai.action(state, self.env.num_halfmoves, no_act) if action is None: logger.info("AI has resigned!") return self.history.append(action) if not self.env.red_to_move: action = flip_move(action) key = self.env.get_state() p, v = self.ai.debug[key] logger.info(f"check = {check}, NN value = {v:.3f}") self.nn_value = v logger.info("MCTS results:") self.mcts_moves = {} for move, action_state in self.ai.search_results.items(): move_cn = self.env.board.make_single_record( int(move[0]), int(move[1]), int(move[2]), int(move[3])) logger.info( f"move: {move_cn}-{move}, visit count: {action_state[0]}, Q_value: {action_state[1]:.3f}, Prior: {action_state[2]:.3f}" ) self.mcts_moves[move_cn] = action_state x0, y0, x1, y1 = int(action[0]), int(action[1]), int( action[2]), int(action[3]) chessman_sprite = select_sprite_from_group( self.chessmans, x0, y0) sprite_dest = select_sprite_from_group(self.chessmans, x1, y1) if sprite_dest: self.chessmans.remove(sprite_dest) sprite_dest.kill() chessman_sprite.move(x1, y1, self.chessman_w, self.chessman_h) self.history.append(self.env.get_state()) def draw_widget(self, screen, widget_background): white_rect = Rect(0, 0, self.screen_width - self.width, self.height) widget_background.fill((255, 255, 255), white_rect) pygame.draw.line(widget_background, (255, 0, 0), (10, 285), (self.screen_width - self.width - 10, 285)) screen.blit(widget_background, (self.width, 0)) self.draw_records(screen, widget_background) self.draw_evaluation(screen, widget_background) def draw_records(self, screen, widget_background): text = '着法记录' self.draw_label(screen, widget_background, text, 10, 16, 10) records = self.env.board.record.split('\n') font_file = self.config.resource.font_path font = pygame.font.Font(font_file, 12) i = 0 for record in records[-self.disp_record_num:]: self.rec_labels[i] = font.render(record, True, (0, 0, 0), (255, 255, 255)) t_rect = self.rec_labels[i].get_rect() # t_rect.centerx = (self.screen_width - self.width) / 2 t_rect.y = 35 + i * 15 t_rect.x = 10 t_rect.width = self.screen_width - self.width widget_background.blit(self.rec_labels[i], t_rect) i += 1 screen.blit(widget_background, (self.width, 0)) def draw_evaluation(self, screen, widget_background): title_label = 'CC-Zero信息' self.draw_label(screen, widget_background, title_label, 300, 16, 10) info_label = f'MCTS搜索次数:{self.config.play.simulation_num_per_move}' self.draw_label(screen, widget_background, info_label, 335, 14, 10) eval_label = f"当前局势评估: {self.nn_value:.3f}" self.draw_label(screen, widget_background, eval_label, 360, 14, 10) label = f"MCTS搜索结果:" self.draw_label(screen, widget_background, label, 395, 14, 10) label = f"着法 访问计数 动作价值 先验概率" self.draw_label(screen, widget_background, label, 415, 12, 10) i = 0 tmp = copy.deepcopy(self.mcts_moves) for mov, action_state in tmp.items(): label = f"{mov}" self.draw_label(screen, widget_background, label, 435 + i * 20, 12, 10) label = f"{action_state[0]}" self.draw_label(screen, widget_background, label, 435 + i * 20, 12, 70) label = f"{action_state[1]:.2f}" self.draw_label(screen, widget_background, label, 435 + i * 20, 12, 100) label = f"{action_state[2]:.3f}" self.draw_label(screen, widget_background, label, 435 + i * 20, 12, 150) i += 1 def draw_label(self, screen, widget_background, text, y, font_size, x=None): font_file = self.config.resource.font_path font = pygame.font.Font(font_file, font_size) label = font.render(text, True, (0, 0, 0), (255, 255, 255)) t_rect = label.get_rect() t_rect.y = y if x != None: t_rect.x = x else: t_rect.centerx = (self.screen_width - self.width) / 2 widget_background.blit(label, t_rect) screen.blit(widget_background, (self.width, 0))
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.chessmans = None self.human_move_first = True def load_model(self): sess = set_session_config(per_process_gpu_memory_fraction=1, allow_growth=True, device_list=self.config.opts.device_list) self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() self.model.sess = sess def start(self, human_first=True): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) self.human_move_first = human_first labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() while not self.env.board.is_end(): if human_first == self.env.red_to_move: self.env.board.calc_chessmans_moving_list() is_correct_chessman = False is_correct_position = False chessman = None while not is_correct_chessman: title = "请输入棋子位置: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int( input_chessman_pos[1]) chessman = self.env.board.chessmans[x][y] if chessman != None and chessman.is_red == self.env.board.is_red_turn: is_correct_chessman = True print(f"当前棋子为{chessman.name_cn},可以落子的位置有:") for point in chessman.moving_list: print(point.x, point.y) else: print("没有找到此名字的棋子或未轮到此方走子") while not is_correct_position: title = "请输入落子的位置: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int( input_chessman_pos[1]) is_correct_position = chessman.move(x, y) if is_correct_position: self.env.board.print_to_cl() self.env.board.clear_chessmans_moving_list() else: action, policy = self.ai.action(self.env.get_state(), self.env.num_halfmoves) if not self.env.red_to_move: action = flip_move(action) if action is None: print("AI投降了!") break self.env.step(action) print(f"AI选择移动 {action}") self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record()