def test_loss_cut_ab(seed=random.random()): create_ev_table(ev_table) print("seed", seed) winning_rate = 0.0 drow_count = 0 for i in range(10): random.seed(seed * i) state = State() est_ii_state = EstimatedState() est_ii_state.create_est_ii_state_from_state(state) while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 1: winning_rate += 1 # 先手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action, est_ii_state = cut_loss_alpha_beta_action( est_ii_state, 5) est_ii_state.my_real_next(state, action) else: action = alpha_beta_action(state) est_ii_state.enemy_real_next(action) print(state) state = state.next(action) # 先手後手を入れ替えて同じ条件で対戦 random.seed(seed * i) state = State() while True: if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 # 後手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = alpha_beta_action(state) est_ii_state.enemy_real_next(action) else: action = perfect_alpha_beta_action(state, 5) est_ii_state.my_real_next(state, action) state = state.next(action) print(winning_rate, (i + 1) * 2, drow_count)
def exp_reduction_effect( seed=random.random(), reduction_func=IDDFS_alpha_beta_action): # 状態の生成 create_ev_table(ev_table) print("seed", seed) reduction_ab_action = time_limit_alpha_beta(reduction_func) # 勝率を計測する方 simple_ab_action = time_limit_alpha_beta(alpha_beta_action) # 対戦相手 winning_rate = 0.0 drow_count = 0 for i in range(50): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 1: winning_rate += 1 # 先手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = reduction_ab_action(state) else: action = simple_ab_action(state) state = state.next(action) # 先手後手を入れ替えて同じ条件で対戦 random.seed(seed * i) state = State() while True: if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 # 後手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = simple_ab_action(state) else: action = reduction_ab_action(state) state = state.next(action) print(winning_rate, (i + 1) * 2, drow_count)
def exp_search_depth_effect(seed=random.random(), deep_depth=5, shallow_depth=3, search_func=alpha_beta_action): # 状態の生成 create_ev_table(ev_table) print("seed", seed) winning_rate = 0.0 drow_count = 0 for i in range(50): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 1: winning_rate += 1 # 先手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = search_func(state, deep_depth) # 深い探索 else: action = search_func(state, shallow_depth) # 浅い探索 state = state.next(action) # 先手後手を入れ替えて同じ条件で対戦 random.seed(seed * i) state = State() while True: if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 # 後手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = search_func(state, shallow_depth) # 浅い探索 else: action = search_func(state, deep_depth) # 深い探索 state = state.next(action) print(winning_rate, (i + 1) * 2, drow_count)
def exp_effect_of_search_depth(func_id=2, seed=random.random()): # 状態の生成 create_ev_table(ev_table, select_func(func_id)) print("seed", seed) gamma = 100000 # スレッショルドカットを実施しない depths = [2, 3, 4, 5, 6] for depth in depths: winning_rate = 0.0 drows_count = 0 for i in range(100): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 else: drows_count += 1 winning_rate += 0.5 break # 行動の取得 if state.is_first_player(): action = mcts_action(state) else: action = alpha_beta_action(state, gamma, depth) state = state.next(action) print("勝率", winning_rate, "drows_count=", drows_count)
def exp_value_changing(depth=5, func_id=3, gamma=1.0, seed=random.random()): record_values = [] # 評価値を記録 record_boards = [] # 評価値に連動して盤面を記録 for i in range(100): random.seed(seed * (i + 1)) state = State() ii_state = AccessableState() values = [] boards = [] while True: if state.is_done(): break if state.is_first_player(): action = move_ordering_alpha_beta_action(state, 1, depth, i) # 盤面の評価値を算出し記録 ii_state.create_ii_state_from_state(state) values.append(evaluate_board_state(ii_state)) boards.append([state.pieces, state.enemy_pieces]) else: action = random_action(state) state = state.next(action) record_values.apped(values) record_boards.apped(boards) # TODO: csvに出力する print(record_values) print(record_boards)
def exp_fair_compete(depth=5, func_id=3, seed=random.random()): gamma = 100000 # スレッショルドカットを実施しない restricts = [True, False] print(seed) for restrict in restricts: create_ev_table(ev_table, select_func(func_id)) winning_rate = 0.0 drows_count = 0 for i in range(100): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 else: winning_rate += 0.5 drows_count += 1 break # 行動の取得 if state.is_first_player(): action = alpha_beta_action(state, gamma, depth, not restrict) else: action = alpha_beta_action(state, gamma, depth, restrict) state = state.next(action) print("制限", restrict, "のエージェントが後手の際の勝率") print(winning_rate, "drows_count=", drows_count)
def exp_effect_of_action_restrict_for_compete(depth=5, func_id=2, rdm=random.random()): gamma = 100000 # スレッショルドカットを実施しない restricts = [True, False] for restrict in restricts: create_ev_table(ev_table, select_func(func_id)) winning_rate = 0.0 drows_count = 0 for i in range(100): random.seed(rdm * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 else: winning_rate += 0.5 drows_count += 1 break # 行動の取得 if state.is_first_player(): action = ii_mcts_action(state) else: action = alpha_beta_action(state, gamma, depth, restrict) state = state.next(action) print("restrict", restrict) print(winning_rate, "drows_count=", drows_count)
def exp_effect_of_action_restrict_for_time(depth=5, func_id=2): gamma = 100000 # スレッショルドカットを実施しない create_ev_table(ev_table, select_func(func_id)) # 評価関数は固定 state = State() restrict_time = 0.0 no_restrict_time = 0.0 while True: # ゲーム終了時 if state.is_done(): break # 行動の取得 if state.is_first_player(): action = random_action(state) # ランダム行動 else: # 行動数の削減あり start = time.time() for _ in range(50): action = alpha_beta_action(state, gamma, depth, True) restrict_time += time.time() - start # 行動数の削減なし start = time.time() for _ in range(50): action = alpha_beta_action(state, gamma, depth, False) no_restrict_time += time.time() - start action = random_action(state) # お互いにランダム行動をさせる state = state.next(action) print("restrict:", restrict_time, "no_restrict:", no_restrict_time)
def play(model): # 학습 데이터 history = [] # 상태 생성 state = State() while True: # 게임 종료 시 if state.is_done(): break # 합법적인 수의 확률 분포 얻기 scores = pv_mcts_scores(model, state, SP_TEMPERATURE) # 학습 데이터에 상태와 정책 추가 policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy history.append([[state.pieces, state.enemy_pieces], policies, None]) # 행동 얻기 action = np.random.choice(state.legal_actions(), p=scores) # 다음 상태 얻기 state = state.next(action) # 학습 데이터에 가치 추가 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def exp_gamma_time(depth=5, func_id=2, seed=random.random()): print("seed", seed) random.seed(seed) state = State() create_ev_table(ev_table, select_func(func_id)) keep_gamma_time = [0] * 30 # ゲーム終了までのループ while True: # ゲーム終了時 if state.is_done(): break # 行動の取得 if state.is_first_player(): action = random_action(state) else: gamma = 0.0 for index, _ in enumerate(keep_gamma_time): start = time.time() for _ in range(100): # action = alpha_beta_action(state, gamma) action = alpha_beta_action(state, gamma, depth, False) keep_gamma_time[index] += time.time() - start gamma += 0.1 # データをばらつかせるためにランダム行動をとる action = random_action(state) print(keep_gamma_time) # 次の状態の取得 state = state.next(action)
def exp_effect_of_search_depth(): gamma = 100000 # スレッショルドカットを実施しない rdm = random.random() for func_id in range(8): create_ev_table(ev_table, select_func(func_id)) winning_rate = 0.0 drows_count = 0 for i in range(100): random.seed(rdm * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 else: winning_rate += 0.5 drows_count += 1 break # 行動の取得 if state.is_first_player(): action = ii_mcts_action(state) else: action = alpha_beta_action(state, gamma, 5) state = state.next(action) print(winning_rate, "id=", func_id, "drows_count=", drows_count)
def play(model): history = [] state = State() while True: if state.is_done(): break scores = pv_mcts_scores(model, state, SP_TEMPERATURE) policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy history.append([state.pieces_array(), policies, None]) action = np.random.choice(state.legal_actions(), p=scores) state = state.next(action) value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def play(model): # 学習データ history = [] # 状態の生成 state = State() while True: # ゲーム終了時 if state.is_done(): break # 合法手の確率分布の取得 scores = pv_mcts_scores(model, state, SP_TEMPERATURE) # 学習データに状態と方策を追加 policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy history.append([state.pieces_array(), policies, None]) # 行動の取得 action = np.random.choice(state.legal_actions(), p=scores) # 次の状態の取得 state = state.next(action) # 学習データに価値を追加 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def vs_mcts(ev_func, seed, buttle_num): winning_rate = 0.0 drow_count = 0 for i in range(buttle_num): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 1: winning_rate += 1 # 先手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = alpha_beta_action(state, ev_func, 5) else: action = mcts_action(state) state = state.next(action) # 先手後手を入れ替えて同じ条件で対戦 random.seed(seed * i) state = State() while True: if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 # 後手勝ち else: winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = mcts_action(state) else: action = alpha_beta_action(state, ev_func, 5) state = state.next(action) print(winning_rate, drow_count) return winning_rate
def play(model): # 学習データ history = [] # 状態の生成 state = State() while True: # ゲーム終了時 if state.is_done(): break # 合法手の確率分布の取得 scores, values = pv_mcts_scores(model, state, SP_TEMPERATURE) # 学習データに状態と方策を追加 policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy # 行動の取得 action = np.random.choice(state.legal_actions(), p=scores) # state, policy, value, 探索結果, 選ばれた手、それから先の局面 history.append([[state.pieces, state.enemy_pieces], policies, None, values, action, None]) # 次の状態の取得 state = state.next(action) # 学習データに価値を追加 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value # 最後の局面情報を取っておく last_state = history[-1][0] last_policy = [0] * DN_OUTPUT_SIZE v0 = history[0][2] v1 = history[1][2] for i in range(len(history)): rp = [] for inc in range(3): index = i + inc if index < len(history): rp.append(history[i + inc]) else: v = v0 if ((i + inc) % 2) == 0 else v1 a = randint(9) rp.append([last_state, last_policy, v, v, a, None]) history[i][5] = rp return history
def play(model, using_saved_state=False, saving_ontheway_state=False): ''' 1ゲームの実行 ''' # 学習データ history = [] # 状態の生成 if using_saved_state: state = load_state() if not state: state = State() else: state = State() starttime = time.time() print('') while True: # ゲーム終了時 if state.is_done(): endtime = time.time() print("first player is ", "lose" if state.is_lose() else "win") print("first player num:", state.piece_count(state.pieces)) print('elapsed time', endtime - starttime) print(state) break # 合法手の確率分布の取得 scores = pv_mcts_scores(model, state, SP_TEMPERATURE) # 学習データに状態と方策を追加 policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy history.append([[state.pieces, state.enemy_pieces], policies, None]) # 行動の取得 if len(history) % 10 == 0: print("state len: ", len(history)) print(state) if saving_ontheway_state and len(history) == 25: save_state(state) action = np.random.choice(state.legal_actions(), p=scores) # 次の状態の取得 state = state.next(action) # 学習データに価値を追加 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def play(next_actions): state = State() while True: if state.is_done(): break action_idx = 0 if state.is_first_player() else 1 next_action = next_actions[action_idx] action = next_action(state) state = state.next_state(action) return first_player_point(state)
def play(next_actions): state = State() while True: if state.is_done(): break next_action = next_actions[0] if state.is_first_player() else next_actions[1] action = next_action(state) state = state.next(action) return first_player_point(state)
def play(next_actions_num): state = State() while True: if state.is_done(): break next_action_num = next_actions_num[0] if state.is_first_player( ) else next_actions_num[1] action_num = next_action_num(state) state.next(action_num) return first_player_point(state)
def play(next_actions): # 状態の生成 state = State() # ゲーム終了までループ while True: # ゲーム終了時 if state.is_done(): break; # 行動の取得 next_action = next_actions[0] if state.is_first_player() else next_actions[1] action = next_action(state) # 次の状態の取得 state = state.next(action) # 先手プレイヤーのポイントを返す return first_player_point(state)
def play(next_actions): # 상태 생성 state = State() # 게임 종료 시까지 반복 while True: # 게임 종료 시 if state.is_done(): break # 행동 얻기 next_action = next_actions[0] if state.is_first_player() else next_actions[1] action = next_action(state) # 다음 상태 얻기 state = state.next(action) # 선 수 플레이어의 포인트 반환 return first_player_point(state)
def play(next_actions) -> float: """1ゲームの実行""" state = State() # 状態の生成 # ゲーム終了までループ while True: if state.is_done(): break # 行動の取得 next_action = next_actions[0] if state.is_first_player( ) else next_actions[1] action = next_action(state) # 次の状態の取得 state = state.next(action) # 先手プレイヤーのポイントを返す return first_player_point(state)
def exp_move_ordering_time(depth=5, func_id=3, gamma=1.0, seed=random.random()): print("seed", seed) timer = [0.0] * (depth + 1) random.seed(seed) state = State() while True: if state.is_done(): break for i in range(depth + 1): start = time.time() for _ in range(1): move_ordering_alpha_beta_action(state, 1, depth, i) timer[i] += time.time() - start action = random_action(state) state = state.next(action) # ランダムにゲームを進める
def play(model): history = [] state = State() while True: if state.is_done(): break scores = pv_mcts_scores(model, state, SP_TEMPERATURE) with open('action_list.txt', 'rb') as f: action_list = pickle.load(f) # print('action_list:', len(action_list)) policies = np.zeros(len(action_list)) # for action_num, policy in zip(state.legal_actions(), scores): # policies[action_num] = policy # print('size check', len(policies), len(scores)) legal_actions = state.legal_actions() for i in range(len(legal_actions)): policies[legal_actions[i]] = scores[i] # print(policies) # print('policies:', policies) history.append([[state.pieces, state.enemy_pieces], policies, None]) # action_list_num = np.arange(len(action_list)) # action_num = np.random.choice(action_list_num, p=scores) action_num = np.random.choice(legal_actions, p=scores) # print(action_num) state.next(action_num) value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def exp_gamma_winning_rate(depth=5, func_id=2, seed=random.random()): # 状態の生成 create_ev_table(ev_table, select_func(func_id)) keep_gamma_winning_rate = [0] * 30 print("seed", seed) gamma = 0.0 for index, _ in enumerate(keep_gamma_winning_rate): winning_rate = 0.0 for i in range(100): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 # 後手勝ち # elif state.depth % 2 == 1: # pass # 先手勝ち else: # 引き分け winning_rate += 0.5 break # 行動の取得 if state.is_first_player(): # action = random_action(state) action = mcts_action(state) else: # action = alpha_beta_action(state, gamma) action = alpha_beta_action(state, gamma, depth, False) state = state.next(action) keep_gamma_winning_rate[index] = winning_rate print(keep_gamma_winning_rate) gamma += 0.1 print(keep_gamma_winning_rate)
def play(model): # 학습 데이터 history = [] # 상태 생성 state = State() while True: # 게임 종료 시 if state.is_done(): break # 합법적인 수의 확률 분포 얻기 # (모델, 게임 상태, 온도파라미터:변동성을주기위해사용하는변수) # 각 노드의 점수가 계산 scores = pv_mcts_scores(model, state, SP_TEMPERATURE) # 학습 데이터에 상태와 정책 추가 policies = [0] * DN_OUTPUT_SIZE # 행동수 :7 # 돌을 놓을수 있는 후보지, 점수를 넣어서 for action, policy in zip(state.legal_actions(), scores): # 행동과 정책을 세팅 # 어떤 열에 정책 세팅 policies[action] = policy # 내역을 기록 ( [내돌상태, 적돌상태], 정책, None(점수)) history.append([[state.pieces, state.enemy_pieces], policies, None]) # 행동 얻기 action = np.random.choice(state.legal_actions(), p=scores) # 다음 상태 얻기 state = state.next(action) # 학습 데이터에 가치 추가 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def keisoku(): buttle_num = 0 for _ in range(100): # 状態の生成 state = State() create_ev_table(ev_table) create_red_ev_table(red_ev_table) # 大実験や keep_info = KeepInfo() counter = [0] * 148 # ゲーム終了までのループ while True: # ゲーム終了時 if state.is_done(): buttle_num += 1 break # 行動の取得 if state.is_first_player(): action, counter = check_unnecessary_action(state, counter) else: action, counter = check_unnecessary_action(state, counter) state = state.next(action) print(buttle_num, "戦目") print(counter)
class GameUI(tk.Frame): def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title("三目並べ") self.state = State() self.next_action = mini_max_action self.c = tk.Canvas(self, width=240, height=240, highlightthickness=0) self.c.bind("<Button-1>", self.turn_of_human) self.c.pack() self.on_draw() def turn_of_human(self, event): if self.state.is_done(): self.state = State() self.on_draw() return if not self.state.is_first_player(): return x = int(event.x / 80) y = int(event.y / 80) if x < 0 or 2 < x or y < 0 or 2 < y: return action = x + y * 3 if not (action in self.state.legal_actions()): return self.state = self.state.next(action) self.on_draw() self.master.after(1, self.turn_of_ai) def turn_of_ai(self): if self.state.is_done(): return action = self.next_action(self.state) self.state = self.state.next(action) self.on_draw() def draw_piece(self, index, first_player): x = (index % 3) * 80 + 10 y = int(index / 3) * 80 + 10 if first_player: self.c.create_oval(x, y, x + 60, y + 60, width=2.0, outline="#FFFFFF") else: self.c.create_line(x, y, x + 60, y + 60, width=2.0, fill="#5D5D5D") self.c.create_line(x + 60, y, x, y + 60, width=2.0, fill="#5D5D5D") def on_draw(self): self.c.delete("all") self.c.create_rectangle(0, 0, 240, 240, width=0.0, fill="#00A0FF") self.c.create_line(80, 0, 80, 240, width=2.0, fill="#0077BB") self.c.create_line(160, 0, 160, 240, width=2.0, fill="#0077BB") self.c.create_line(0, 80, 240, 80, width=2.0, fill="#0077BB") self.c.create_line(0, 160, 240, 160, width=2.0, fill="#0077BB") for i in range(9): if self.state.pieces[i] == 1: self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
def boltzman(xs, temperature): xs = [x ** (1 / temperature) for x in xs] return [x / sum(xs) for x in xs] # 동작 확인 if __name__ == '__main__': # 모델 로드 path = sorted(Path('model').glob('*.h5'))[-1] model = load_model(str(path)) # 상태 생성 state = State() # 몬테카를로 트리 탐색을 활용해 행동을 얻는 함수 생성 next_action = pv_mcts_action(model, 1.0) # 게임 종료 시까지 반복 while True: # 게임 종료 시 if state.is_done(): break # 행동 얻기 action = next_action(state) # 다음 상태 얻기 state = state.next(action) # 문자열 출력 print(state)
class GameUI(tk.Frame): # 초기화 def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('간이 장기') # 게임 상태 생성 self.state = State() self.select = -1 # 선택(-1: 없음, 0~11: 매스, 12~14: 획득한 말) # 방향 정수 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) # PV MCTS를 활용한 행동 선택을 수행하는 함수 생성 self.next_action = pv_mcts_action(model, 0.0) # 이미지 준비 self.images = [(None, None, None, None)] for i in range(1, 5): image = Image.open('piece{}.png'.format(i)) self.images.append(( ImageTk.PhotoImage(image), ImageTk.PhotoImage(image.rotate(180)), ImageTk.PhotoImage(image.resize((40, 40))), ImageTk.PhotoImage(image.resize((40, 40)).rotate(180)))) # 캔버스 생성 self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 화면 갱신 self.on_draw() # 사람의 턴 def turn_of_human(self, event): # 게임 종료 시 if self.state.is_done(): self.state = State() self.on_draw() return # 선 수가 아닌 경우 if not self.state.is_first_player(): return # 획득한 말의 종류 얻기 captures = [] for i in range(3): if self.state.pieces[12 + i] >= 2: captures.append(1 + i) if self.state.pieces[12 + i] >= 1: captures.append(1 + i) # 말 선택과 이동 위치 계산(0~11: 매스. 12~13: 획득한 말) p = int(event.x / 80) + int((event.y - 40) / 80) * 3 if 40 <= event.y and event.y <= 360: select = p elif event.x < len(captures) * 40 and event.y > 360: select = 12 + int(event.x / 40) else: return # 말 선택 if self.select < 0: self.select = select self.on_draw() return # 말 선택과 이동을 행동으로 변환 action = -1 if select < 12: # 말 이동 시 if self.select < 12: action = self.state.position_to_action(p, self.position_to_direction(self.select, p)) # 획득한 말 배치 시 else: action = self.state.position_to_action(p, 8 - 1 + captures[self.select - 12]) # 합법적인 수가 아닌 경우 if not (action in self.state.legal_actions()): self.select = -1 self.on_draw() return # 다음 상태 얻기 self.state = self.state.next(action) self.select = -1 self.on_draw() # AI의 턴 self.master.after(1, self.turn_of_ai) # AI의 턴 def turn_of_ai(self): # 게임 종료 시 if self.state.is_done(): return # 행동 얻기 action = self.next_action(self.state) # 다음 상태 얻기 self.state = self.state.next(action) self.on_draw() # 말의 이동 대상 위치를 말의 이동 방향으로 변환 def position_to_direction(self, position_src, position_dst): dx = position_dst % 3 - position_src % 3 dy = int(position_dst / 3) - int(position_src / 3) for i in range(8): if self.dxy[i][0] == dx and self.dxy[i][1] == dy: return i return 0 # 말 그리기 def draw_piece(self, index, first_player, piece_type): x = (index % 3) * 80 y = int(index / 3) * 80 + 40 index = 0 if first_player else 1 self.c.create_image(x, y, image=self.images[piece_type][index], anchor=tk.NW) # 획득한 말 그리기 def draw_capture(self, first_player, pieces): index, x, dx, y = (2, 0, 40, 360) if first_player else (3, 200, -40, 0) captures = [] for i in range(3): if pieces[12 + i] >= 2: captures.append(1 + i) if pieces[12 + i] >= 1: captures.append(1 + i) for i in range(len(captures)): self.c.create_image(x + dx * i, y, image=self.images[captures[i]][index], anchor=tk.NW) # 커서 그리기 def draw_cursor(self, x, y, size): self.c.create_line(x + 1, y + 1, x + size - 1, y + 1, width=4.0, fill='#FF0000') self.c.create_line(x + 1, y + size - 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000') self.c.create_line(x + 1, y + 1, x + 1, y + size - 1, width=4.0, fill='#FF0000') self.c.create_line(x + size - 1, y + 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000') # 화면 갱신 def on_draw(self): # 매스 눈금 self.c.delete('all') self.c.create_rectangle(0, 0, 240, 400, width=0.0, fill='#EDAA56') for i in range(1, 3): self.c.create_line(i * 80 + 1, 40, i * 80, 360, width=2.0, fill='#000000') for i in range(5): self.c.create_line(0, 40 + i * 80, 240, 40 + i * 80, width=2.0, fill='#000000') # 말 for p in range(12): p0, p1 = (p, 11 - p) if self.state.is_first_player() else (11 - p, p) if self.state.pieces[p0] != 0: self.draw_piece(p, self.state.is_first_player(), self.state.pieces[p0]) if self.state.enemy_pieces[p1] != 0: self.draw_piece(p, not self.state.is_first_player(), self.state.enemy_pieces[p1]) # 획득한 말 self.draw_capture(self.state.is_first_player(), self.state.pieces) self.draw_capture(not self.state.is_first_player(), self.state.enemy_pieces) # 선택 커서 if 0 <= self.select and self.select < 12: self.draw_cursor(int(self.select % 3) * 80, int(self.select / 3) * 80 + 40, 80) elif 12 <= self.select: self.draw_cursor((self.select - 12) * 40, 360, 40)