def self_play_buffer(config, pipes_bt, pipes_ng, idx, res_data) -> (tuple, list): sleep(random()) playouts = randint(8, 12) * 100 config.play.simulation_num_per_move = playouts logger.info(f"Set playouts = {config.play.simulation_num_per_move}") pipe1 = pipes_bt.pop() # borrow pipe2 = pipes_ng.pop() player1 = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe1, enable_resign=False, debugging=False) player2 = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe2, enable_resign=False, debugging=False) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = player1 black = player2 print(f"基准模型执红,待评测模型执黑") else: red = player2 black = player1 print(f"待评测模型执红,基准模型执黑") state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 game_over = False final_move = None no_eat_count = 0 check = False while not game_over: no_act = None if not check and state in history[:-1]: no_act = [] free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: # 如果走了下一步是将军或捉:禁止走那步 if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) # 否则当作闲着处理 else: free_move[state] += 1 if free_move[state] >= 2: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if game_over: break start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns, no_act=no_act) else: action, _ = black.action(state, turns, no_act=no_act) end_time = time() if action is None: print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!") value = -1 break print( f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s" ) # policys.append(policy) history.append(action) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if final_move: history.append(final_move) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) data = [] if idx % 2 == 0: data = [res_data['base']['digest'], res_data['unchecked']['digest']] else: data = [res_data['unchecked']['digest'], res_data['base']['digest']] player1.close() player2.close() del player1, player2 gc.collect() if turns % 2 == 1: # balck turn value = -value v = value data.append(history[0]) for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value pipes_bt.append(pipe1) pipes_ng.append(pipe2) return (turns, v, idx), data
class SelfPlayWorker: def __init__(self, config: Config, pipes=None, pid=None): self.config = config self.player = None self.cur_pipes = pipes self.id = pid self.buffer = [] self.pid = os.getpid() def start(self): self.pid = os.getpid() logger.debug( f"Selfplay#Start Process index = {self.id}, pid = {self.pid}") idx = 1 self.buffer = [] while True: search_tree = defaultdict(VisitState) start_time = time() value, turns, state, store = self.start_game(idx, search_tree) end_time = time() if value != 1 and value != -1: winner = 'Draw' elif idx % 2 == 0 and value == 1 or idx % 2 == 1 and value == -1: winner = 'AlphaHe' else: winner = 'Eleeye' logger.debug( f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, " f"turn={turns / 2}, value = {value:.2f}, winner is {winner}") if turns <= 10 and store: senv.render(state) if store: idx += 1 def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] value = 0 turns = 0 # even == red; odd == black game_over = False is_alpha_red = True if idx % 2 == 0 else False final_move = None check = False while not game_over: if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1): no_act = None if not check and state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) action, _ = self.player.action(state, turns, no_act) if action is None: logger.debug( f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break else: fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: logger.debug( f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break if turns % 2 == 1: action = flip_move(action) history.append(action) state = senv.step(state, action) turns += 1 history.append(state) if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done( state, need_check=True) if final_move: history.append(final_move) state = senv.step(state, final_move) history.append(state) turns += 1 value = -value self.player.close() del search_tree del self.player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value if turns <= 10: if random() > 0.7: store = True else: store = False else: store = True if store: data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, store def get_ucci_move(self, fen, time=3): p = subprocess.Popen(self.config.resource.eleeye_path, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) setfen = f'position fen {fen}\n' setrandom = f'setoption randomness {self.config.opts.random}\n' cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n' try: out, err = p.communicate(cmd, timeout=time + 0.5) except subprocess.TimeoutExpired: p.kill() try: out, err = p.communicate() except Exception as e: logger.error(f"{e}, cmd = {cmd}") return self.get_ucci_move(fen, time + 1) lines = out.split('\n') if lines[-2] == 'nobestmove': return None move = lines[-2].split(' ')[1] if move == 'depth': move = lines[-1].split(' ')[6] return senv.parse_ucci_move(move) def save_play_data(self, idx, data): self.buffer += data if not idx % self.config.play_data.nb_game_in_file == 0: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return try: for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) except: pass def build_policy(self, action, flip): labels_n = len(ActionLabelsRed) move_lookup = { move: i for move, i in zip(ActionLabelsRed, range(labels_n)) } policy = np.zeros(labels_n) policy[move_lookup[action]] = 1 if flip: policy = flip_policy(policy) return list(policy)
class SelfPlayWorker: def __init__(self, config: Config, pipes=None, pid=None): self.config = config self.player = None self.cur_pipes = pipes self.id = pid self.buffer = [] self.pid = os.getpid() def start(self): self.pid = os.getpid() logger.debug(f"Selfplay#Start Process index = {self.id}, pid = {self.pid}") idx = 1 self.buffer = [] search_tree = defaultdict(VisitState) while True: start_time = time() search_tree = defaultdict(VisitState) value, turns, state, store = self.start_game(idx, search_tree) end_time = time() logger.debug(f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, " f"turn={turns / 2}, winner = {value:.2f} (1 = red, -1 = black, 0 draw)") if turns <= 10: senv.render(state) if store: idx += 1 sleep(random()) def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False final_move = None no_eat_count = 0 check = False while not game_over: no_act = None if not check and state in history[:-1]: no_act = [] free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: # 如果走了下一步是将军或捉:禁止走那步 if senv.will_check_or_catch(state, history[i+1]): no_act.append(history[i + 1]) # 否则当作闲着处理 else: free_move[state] += 1 if free_move[state] >= 2: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if game_over: break start_time = time() action, policy = self.player.action(state, turns, no_act) end_time = time() if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break if self.config.opts.log_move: logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # for move, action_state in self.player.search_results.items(): # if action_state[0] >= 20: # logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}") # self.player.search_results = {} history.append(action) # policys.append(policy) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if final_move: # policy = self.build_policy(final_move, False) history.append(final_move) # policys.append(policy) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) self.player.close() del search_tree del self.player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value if turns < 10: if random() > 0.9: store = True else: store = False else: store = True if store: data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, store def save_play_data(self, idx, data): self.buffer += data if not idx % self.config.play_data.nb_game_in_file == 0: return rc = self.config.resource utc_dt = datetime.utcnow().replace(tzinfo=timezone.utc) bj_dt = utc_dt.astimezone(timezone(timedelta(hours=8))) game_id = bj_dt.strftime("%Y%m%d-%H%M%S.%f") filename = rc.play_data_filename_tmpl % game_id path = os.path.join(rc.play_data_dir, filename) logger.info(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return try: for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) except: pass def build_policy(self, action, flip): labels_n = len(ActionLabelsRed) move_lookup = {move: i for move, i in zip(ActionLabelsRed, range(labels_n))} policy = np.zeros(labels_n) policy[move_lookup[action]] = 1 if flip: policy = flip_policy(policy) return list(policy)
class EvaluateWorker: def __init__(self, config: Config, pipes1=None, pipes2=None, pid=None, data=None, hist_base=True, hist_ng=True): self.config = config self.player_bt = None self.player_ng = None self.pid = pid self.pipes_bt = pipes1 self.pipes_ng = pipes2 self.data = data self.hist_base = hist_base self.hist_ng = hist_ng def start(self): logger.debug( f"Evaluate#Start Process index = {self.pid}, pid = {os.getpid()}") need_evaluate = True self.config.opts.evaluate = True while need_evaluate: idx = 0 if random() > 0.5 else 1 start_time = time() value, turns, data = self.start_game(idx) end_time = time() if (value == 1 and idx == 0) or (value == -1 and idx == 1): result = '基准模型胜' elif (value == 1 and idx == 1) or (value == -1 and idx == 0): result = '待评测模型胜' else: result = '双方连续60回合未吃子,和棋' if value == -1: # loss score = 0 elif value == 1: # win score = 1 else: score = 0.5 if idx == 0: score = 1 - score else: score = score logger.info( f"进程{self.pid}评测完毕 用时{(end_time - start_time):.1f}秒, " f"{turns / 2}回合, {result}, 得分:{score}, value = {value}, idx = {idx}" ) response = self.save_play_data(idx, data, value, score) if response and int(response['status']) == 0: logger.info('评测结果上传成功!') else: logger.info(f"评测结果上传失败,服务器返回{response}") response = http_request( self.config.internet.get_evaluate_model_url) if int(response['status']) == 0 and response['data']['base']['digest'] == self.data['base']['digest']\ and response['data']['unchecked']['digest'] == self.data['unchecked']['digest']: need_evaluate = True logger.info(f"进程{self.pid}继续评测") else: need_evaluate = False logger.info(f"进程{self.pid}终止评测") def start_game(self, idx): sleep(random()) playouts = randint(8, 12) * 100 self.config.play.simulation_num_per_move = playouts logger.info( f"Set playouts = {self.config.play.simulation_num_per_move}") pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=False, use_history=self.hist_base) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=False, use_history=self.hist_ng) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.info(f"进程id = {self.pid} 基准模型执红,待评测模型执黑") else: red = self.player2 black = self.player1 logger.info(f"进程id = {self.pid} 待评测模型执红,基准模型执黑") state = senv.INIT_STATE history = [state] value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False no_eat_count = 0 check = False while not game_over: start_time = time() no_act = None if not check and state in history[:-1]: no_act = [] free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: # 如果走了下一步是将军或捉:禁止走那步 if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) # 否则当作闲着处理 else: free_move[state] += 1 if free_move[state] >= 2: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if game_over: break if turns % 2 == 0: action, _ = red.action(state, turns, no_act=no_act) else: action, _ = black.action(state, turns, no_act=no_act) end_time = time() if self.config.opts.log_move: logger.debug( f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}" ) if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break history.append(action) state, no_eat = senv.new_step(state, action) turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done( state, need_check=True) if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if final_move: history.append(final_move) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) data = [] if idx % 2 == 0: data = [ self.data['base']['digest'], self.data['unchecked']['digest'] ] else: data = [ self.data['unchecked']['digest'], self.data['base']['digest'] ] self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value v = value data.append(history[0]) for i in range(turns): k = i * 2 data.append([history[k + 1], v]) v = -v self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns, data def save_play_data(self, idx, data, value, score): rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") filename = rc.play_data_filename_tmpl % game_id path = os.path.join(rc.play_data_dir, filename) logger.info(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, data) logger.info(f"Uploading play data {filename} ...") red, black = data[0], data[1] return self.upload_eval_data(path, filename, red, black, value, score) def upload_eval_data(self, path, filename, red, black, result, score): hash = self.fetch_digest(path) data = { 'digest': self.data['unchecked']['digest'], 'red_digest': red, 'black_digest': black, 'result': result, 'score': score, 'hash': hash } response = upload_file(self.config.internet.upload_eval_url, path, filename, data, rm=False) return response def fetch_digest(self, file_path): if os.path.exists(file_path): m = hashlib.sha256() with open(file_path, "rb") as f: m.update(f.read()) return m.hexdigest() return None
def self_play_buffer(config, cur) -> (tuple, list): pipe = cur.pop() # borrow if random() > config.play.enable_resign_rate: enable_resign = True else: enable_resign = False player = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe, enable_resign=enable_resign, debugging=False) state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 game_over = False final_move = None no_eat_count = 0 check = False while not game_over: no_act = None if not check and state in history[:-1]: no_act = [] free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: # 如果走了下一步是将军或捉:禁止走那步 if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) # 否则当作闲着处理 else: free_move[state] += 1 if free_move[state] >= 2: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if game_over: break start_time = time() action, policy = player.action(state, turns, no_act) end_time = time() if action is None: print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!") value = -1 break print( f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s" ) # policys.append(policy) history.append(action) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if final_move: # policy = build_policy(final_move, False) history.append(final_move) # policys.append(policy) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) player.close() del player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value cur.append(pipe) return (turns, v), data
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.winstyle = 0 self.chessmans = None self.human_move_first = True self.screen_width = 720 self.height = 577 self.width = 521 self.chessman_w = 57 self.chessman_h = 57 self.disp_record_num = 15 self.rec_labels = [None] * self.disp_record_num self.nn_value = 0 self.mcts_moves = {} self.history = [] if self.config.opts.bg_style == 'WOOD': self.chessman_w += 1 self.chessman_h += 1 def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def init_screen(self): bestdepth = pygame.display.mode_ok([self.screen_width, self.height], self.winstyle, 32) screen = pygame.display.set_mode([self.screen_width, self.height], self.winstyle, bestdepth) pygame.display.set_caption("中国象棋-AlphaZero") # create the background, tile the bgd image bgdtile = self.load_image(f'{self.config.opts.bg_style}.GIF') bgdtile = pygame.transform.scale(bgdtile, (self.width, self.height)) board_background = pygame.Surface([self.width, self.height]) board_background.blit(bgdtile, (0, 0)) widget_background = pygame.Surface( [self.screen_width - self.width, self.height]) white_rect = Rect(0, 0, self.screen_width - self.width, self.height) widget_background.fill((255, 255, 255), white_rect) # create text label font_file = self.config.resource.font_path font = pygame.font.Font(font_file, 16) font_color = (0, 0, 0) font_background = (255, 255, 255) t = font.render("着法记录", True, font_color, font_background) t_rect = t.get_rect() t_rect.x = 10 t_rect.y = 10 widget_background.blit(t, t_rect) screen.blit(board_background, (0, 0)) screen.blit(widget_background, (self.width, 0)) pygame.display.flip() self.chessmans = pygame.sprite.Group() self.creat_sprite_group(self.chessmans, self.env.board.chessmans_hash, self.chessman_w, self.chessman_h) return screen, board_background, widget_background def start(self, human_first=True): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=True) self.human_move_first = human_first pygame.init() screen, board_background, widget_background = self.init_screen() framerate = pygame.time.Clock() current_chessman = None if human_first: self.env.board.calc_chessmans_moving_list() ai_worker = Thread(target=self.ai_move, name="ai_worker") ai_worker.daemon = True ai_worker.start() while not self.env.board.is_end(): for event in pygame.event.get(): if event.type == pygame.QUIT: self.env.board.print_record() self.ai.close(wait=False) game_id = datetime.now().strftime("%Y%m%d-%H%M%S") path = os.path.join( self.config.resource.play_record_dir, self.config.resource.play_record_filename_tmpl % game_id) self.env.board.save_record(path) sys.exit() elif event.type == VIDEORESIZE: pass elif event.type == MOUSEBUTTONDOWN: if human_first == self.env.red_to_move: pressed_array = pygame.mouse.get_pressed() for index in range(len(pressed_array)): if index == 0 and pressed_array[index]: mouse_x, mouse_y = pygame.mouse.get_pos() col_num, row_num = self.translate_hit_area( mouse_x, mouse_y, self.chessman_w, self.chessman_h) chessman_sprite = self.select_sprite_from_group( self.chessmans, col_num, row_num) if current_chessman is None and chessman_sprite != None: if chessman_sprite.chessman.is_red == self.env.red_to_move: current_chessman = chessman_sprite chessman_sprite.is_selected = True elif current_chessman != None and chessman_sprite != None: if chessman_sprite.chessman.is_red == self.env.red_to_move: current_chessman.is_selected = False current_chessman = chessman_sprite chessman_sprite.is_selected = True else: move = str(current_chessman.chessman.col_num) + str( current_chessman.chessman.row_num) + \ str(col_num) + str(row_num) success = current_chessman.move( col_num, row_num, self.chessman_w, self.chessman_h) self.history.append(move) if success: self.chessmans.remove( chessman_sprite) chessman_sprite.kill() current_chessman.is_selected = False current_chessman = None self.history.append( self.env.get_state()) elif current_chessman != None and chessman_sprite is None: move = str(current_chessman.chessman.col_num) + str( current_chessman.chessman.row_num) + \ str(col_num) + str(row_num) success = current_chessman.move( col_num, row_num, self.chessman_w, self.chessman_h) self.history.append(move) if success: current_chessman.is_selected = False current_chessman = None self.history.append( self.env.get_state()) self.draw_widget(screen, widget_background) framerate.tick(20) # clear/erase the last drawn sprites self.chessmans.clear(screen, board_background) # update all the sprites self.chessmans.update() self.chessmans.draw(screen) pygame.display.update() self.ai.close(wait=False) logger.info(f"Winner is {self.env.board.winner} !!!") self.env.board.print_record() game_id = datetime.now().strftime("%Y%m%d-%H%M%S") path = os.path.join( self.config.resource.play_record_dir, self.config.resource.play_record_filename_tmpl % game_id) self.env.board.save_record(path) sleep(3) def ai_move(self): ai_move_first = not self.human_move_first self.history = [self.env.get_state()] no_act = None while not self.env.done: if ai_move_first == self.env.red_to_move: self.ai.search_results = {} state = self.env.get_state() logger.info(f"state = {state}") _, _, _, check = senv.done(state, need_check=True) if not check and state in self.history[:-1]: no_act = [] free_move = defaultdict(int) for i in range(len(self.history) - 1): if self.history[i] == state: # 如果走了下一步是将军或捉:禁止走那步 if senv.will_check_or_catch( state, self.history[i + 1]): no_act.append(self.history[i + 1]) # 否则当作闲着处理 else: free_move[state] += 1 if free_move[state] >= 2: # 作和棋处理 self.env.winner = Winner.draw self.env.board.winner = Winner.draw break if no_act: logger.debug(f"no_act = {no_act}") action, policy = self.ai.action(state, self.env.num_halfmoves, no_act) if action is None: logger.info("AI has resigned!") return self.history.append(action) if not self.env.red_to_move: action = flip_move(action) key = self.env.get_state() p, v = self.ai.debug[key] logger.info(f"check = {check}, NN value = {v:.3f}") self.nn_value = v logger.info("MCTS results:") self.mcts_moves = {} for move, action_state in self.ai.search_results.items(): move_cn = self.env.board.make_single_record( int(move[0]), int(move[1]), int(move[2]), int(move[3])) logger.info( f"move: {move_cn}-{move}, visit count: {action_state[0]}, Q_value: {action_state[1]:.3f}, Prior: {action_state[2]:.3f}" ) self.mcts_moves[move_cn] = action_state x0, y0, x1, y1 = int(action[0]), int(action[1]), int( action[2]), int(action[3]) chessman_sprite = self.select_sprite_from_group( self.chessmans, x0, y0) sprite_dest = self.select_sprite_from_group( self.chessmans, x1, y1) if sprite_dest: self.chessmans.remove(sprite_dest) sprite_dest.kill() chessman_sprite.move(x1, y1, self.chessman_w, self.chessman_h) self.history.append(self.env.get_state()) def draw_widget(self, screen, widget_background): white_rect = Rect(0, 0, self.screen_width - self.width, self.height) widget_background.fill((255, 255, 255), white_rect) pygame.draw.line(widget_background, (255, 0, 0), (10, 285), (self.screen_width - self.width - 10, 285)) screen.blit(widget_background, (self.width, 0)) self.draw_records(screen, widget_background) self.draw_evaluation(screen, widget_background) def draw_records(self, screen, widget_background): text = '着法记录' self.draw_label(screen, widget_background, text, 10, 16, 10) records = self.env.board.record.split('\n') font_file = self.config.resource.font_path font = pygame.font.Font(font_file, 12) i = 0 for record in records[-self.disp_record_num:]: self.rec_labels[i] = font.render(record, True, (0, 0, 0), (255, 255, 255)) t_rect = self.rec_labels[i].get_rect() # t_rect.centerx = (self.screen_width - self.width) / 2 t_rect.y = 35 + i * 15 t_rect.x = 10 t_rect.width = self.screen_width - self.width widget_background.blit(self.rec_labels[i], t_rect) i += 1 screen.blit(widget_background, (self.width, 0)) def draw_evaluation(self, screen, widget_background): title_label = 'AlphaZero信息' self.draw_label(screen, widget_background, title_label, 300, 16, 10) info_label = f'MCTS搜索次数:{self.config.play.simulation_num_per_move}' self.draw_label(screen, widget_background, info_label, 335, 14, 10) eval_label = f"当前局势评估: {self.nn_value:.3f}" self.draw_label(screen, widget_background, eval_label, 360, 14, 10) label = f"MCTS搜索结果:" self.draw_label(screen, widget_background, label, 395, 14, 10) label = f"着法 访问计数 动作价值 先验概率" self.draw_label(screen, widget_background, label, 415, 12, 10) i = 0 tmp = copy.deepcopy(self.mcts_moves) for mov, action_state in tmp.items(): label = f"{mov}" self.draw_label(screen, widget_background, label, 435 + i * 20, 12, 10) label = f"{action_state[0]}" self.draw_label(screen, widget_background, label, 435 + i * 20, 12, 70) label = f"{action_state[1]:.2f}" self.draw_label(screen, widget_background, label, 435 + i * 20, 12, 100) label = f"{action_state[2]:.3f}" self.draw_label(screen, widget_background, label, 435 + i * 20, 12, 150) i += 1 def draw_label(self, screen, widget_background, text, y, font_size, x=None): font_file = self.config.resource.font_path font = pygame.font.Font(font_file, font_size) label = font.render(text, True, (0, 0, 0), (255, 255, 255)) t_rect = label.get_rect() t_rect.y = y if x != None: t_rect.x = x else: t_rect.centerx = (self.screen_width - self.width) / 2 widget_background.blit(label, t_rect) screen.blit(widget_background, (self.width, 0)) def load_image(self, file, sub_dir=None): '''loads an image, prepares it for play''' if sub_dir: file = os.path.join(self.config.resource.image_path, sub_dir, file) else: file = os.path.join(self.config.resource.image_path, file) try: surface = pygame.image.load(file) except pygame.error: raise SystemExit('Could not load image "%s" %s' % (file, pygame.get_error())) return surface.convert() def load_images(self, *files): global PIECE_STYLE imgs = [] for file in files: imgs.append(self.load_image(file, PIECE_STYLE)) return imgs def creat_sprite_group(self, sprite_group, chessmans_hash, w, h): for chess in chessmans_hash.values(): if chess.is_red: if isinstance(chess, Rook): images = self.load_images("RR.GIF", "RRS.GIF") elif isinstance(chess, Cannon): images = self.load_images("RC.GIF", "RCS.GIF") elif isinstance(chess, Knight): images = self.load_images("RN.GIF", "RNS.GIF") elif isinstance(chess, King): images = self.load_images("RK.GIF", "RKS.GIF") elif isinstance(chess, Elephant): images = self.load_images("RB.GIF", "RBS.GIF") elif isinstance(chess, Mandarin): images = self.load_images("RA.GIF", "RAS.GIF") else: images = self.load_images("RP.GIF", "RPS.GIF") else: if isinstance(chess, Rook): images = self.load_images("BR.GIF", "BRS.GIF") elif isinstance(chess, Cannon): images = self.load_images("BC.GIF", "BCS.GIF") elif isinstance(chess, Knight): images = self.load_images("BN.GIF", "BNS.GIF") elif isinstance(chess, King): images = self.load_images("BK.GIF", "BKS.GIF") elif isinstance(chess, Elephant): images = self.load_images("BB.GIF", "BBS.GIF") elif isinstance(chess, Mandarin): images = self.load_images("BA.GIF", "BAS.GIF") else: images = self.load_images("BP.GIF", "BPS.GIF") chessman_sprite = Chessman_Sprite(images, chess, w, h) sprite_group.add(chessman_sprite) def select_sprite_from_group(self, sprite_group, col_num, row_num): for sprite in sprite_group: if sprite.chessman.col_num == col_num and sprite.chessman.row_num == row_num: return sprite return None def translate_hit_area(self, screen_x, screen_y, w=80, h=80): return screen_x // w, 9 - screen_y // h
class EvaluateWorker: def __init__(self, config: Config, pipes1=None, pipes2=None, pid=None): self.config = config self.player_bt = None self.player_ng = None self.pid = pid self.pipes_bt = pipes1 self.pipes_ng = pipes2 def start(self): logger.debug( f"Evaluate#Start Process index = {self.pid}, pid = {os.getpid()}") score1 = 0 score2 = 0 for idx in range(self.config.eval.game_num): start_time = time() score, turns = self.start_game(idx) end_time = time() if score < 0: score2 += 1 elif score > 0: score1 += 1 else: score2 += 0.5 score1 += 0.5 logger.debug( f"Process{self.pid} play game {idx} time={(end_time - start_time):.1f} sec, " f"turn={turns / 2}, best model {score1} - {score2} next generation model" ) return score2 # return next generation model's score def start_game(self, idx): pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=True) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=True) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.debug(f"best model is red, ng is black") else: red = self.player2 black = self.player1 logger.debug(f"best model is black, ng is red") state = senv.INIT_STATE value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns) else: action, _ = black.action(state, turns) end_time = time() # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!") value = -1 break state = senv.step(state, action) turns += 1 if turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move = senv.done(state) self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value if idx % 2 == 1: value = -value self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns