def exp_value_changing(depth=5, func_id=3, gamma=1.0, seed=random.random()): record_values = [] # 評価値を記録 record_boards = [] # 評価値に連動して盤面を記録 for i in range(100): random.seed(seed * (i + 1)) state = State() ii_state = AccessableState() values = [] boards = [] while True: if state.is_done(): break if state.is_first_player(): action = move_ordering_alpha_beta_action(state, 1, depth, i) # 盤面の評価値を算出し記録 ii_state.create_ii_state_from_state(state) values.append(evaluate_board_state(ii_state)) boards.append([state.pieces, state.enemy_pieces]) else: action = random_action(state) state = state.next(action) record_values.apped(values) record_boards.apped(boards) # TODO: csvに出力する print(record_values) print(record_boards)
def turn_of_human(self, event): # 終局時には描画のみした上でNoneを返す処理 if self.state.is_done(): self.state = State() self.on_draw() return # 先手じゃない場合はNoneを返す処理 if not self.state.is_first_player(): return # クリック位置を取得 x = int(event.x / 45) y = int(event.y / 45) # 画面外ならNoneを返す処理 # print(x, y) if x < 0 or 15 < x or y < 0 or 15 < y: return # クリックした位置に基づき行動(どこに打ったか)を取得 action = x + y * 15 print(action) # 選択した行動がルールに則ったものかチェック if not (action in self.state.legal_actions()): return # ある行動を取った後の状態を取得し、元の状態を更新・描画 self.state = self.state.next(action) self.on_draw() # 描画処理を待つため1ミリ秒待機した後に後手に順番を移動 self.master.after(1, self.turn_of_ai)
def onGameRevealed(self, players, spies): self.leadership = itertools.cycle(players) s = State() s.players = players s.leader = next(self.leadership) self.state = s
def exp_gamma_time(depth=5, func_id=2, seed=random.random()): print("seed", seed) random.seed(seed) state = State() create_ev_table(ev_table, select_func(func_id)) keep_gamma_time = [0] * 30 # ゲーム終了までのループ while True: # ゲーム終了時 if state.is_done(): break # 行動の取得 if state.is_first_player(): action = random_action(state) else: gamma = 0.0 for index, _ in enumerate(keep_gamma_time): start = time.time() for _ in range(100): # action = alpha_beta_action(state, gamma) action = alpha_beta_action(state, gamma, depth, False) keep_gamma_time[index] += time.time() - start gamma += 0.1 # データをばらつかせるためにランダム行動をとる action = random_action(state) print(keep_gamma_time) # 次の状態の取得 state = state.next(action)
def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): self.state = State() self.on_draw() return # 先手でない時 if not self.state.is_first_player(): return # クリック位置を行動に変換 x = int(event.x / 40) if x < 0 or 6 < x: # 範囲外 return action = x # 合法手でない時 if not (action in self.state.legal_actions()): return # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai)
def exp_effect_of_action_restrict_for_time(depth=5, func_id=2): gamma = 100000 # スレッショルドカットを実施しない create_ev_table(ev_table, select_func(func_id)) # 評価関数は固定 state = State() restrict_time = 0.0 no_restrict_time = 0.0 while True: # ゲーム終了時 if state.is_done(): break # 行動の取得 if state.is_first_player(): action = random_action(state) # ランダム行動 else: # 行動数の削減あり start = time.time() for _ in range(50): action = alpha_beta_action(state, gamma, depth, True) restrict_time += time.time() - start # 行動数の削減なし start = time.time() for _ in range(50): action = alpha_beta_action(state, gamma, depth, False) no_restrict_time += time.time() - start action = random_action(state) # お互いにランダム行動をさせる state = state.next(action) print("restrict:", restrict_time, "no_restrict:", no_restrict_time)
def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): self.state = State() self.on_draw() return # 先手でない時 if not self.state.is_first_player(): return # クリック位置を行動に変換 x = int(event.x / 40) y = int(event.y / 40) if x < 0 or 5 < x or y < 0 or 5 < y: # 範囲外 return action = x + y * 6 # 合法手でない時 legal_actions = self.state.legal_actions() if legal_actions == [36]: action = 36 # パス if action != 36 and not (action in legal_actions): return # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai)
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('簡易将棋') self.state = State() self.select = -1 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) self.next_action = pv_mcts_action(model, 0.0) self.images = [(None, None, None, None)] for i in range(1, 5): image = Image.open('piece{}.png'.format(i)) self.images.append( (\ ImageTk.PhotoImage(image), \ ImageTk.PhotoImage(image.rotate(180)),\ ImageTk.PhotoImage(image.resize((40,40))), \ ImageTk.PhotoImage(image.resize((40,40)).rotate(180))\ ) ) self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() self.on_draw()
def __init__(self, master=None, model=None, ai_is_first=True): self.ai_is_first = ai_is_first tk.Frame.__init__(self, master) self.master.title('リバーシ') # ゲーム状態の生成 self.state = State() self.prev_state = None # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # self.next_action = mcs_action # キャンバスの生成 self.c = tk.Canvas(self, width=BOARD_SIZE * 40 + 40, height=BOARD_SIZE * 40, highlightthickness=0) # 後手の場合 if self.ai_is_first: self.turn_of_ai() self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw()
def turn_of_human(self, event): # 게임 종료 시 if self.state.is_done(): self.state = State() self.on_draw() return # 선 수가 아닌 경우 if not self.state.is_first_player(): return # 클릭 위치를 행동으로 변환 x = int(event.x / 40) if x < 0 or 6 < x: # 범위 외 return action = x # 합법적인 수가 아닌 경우 if not (action in self.state.legal_actions()): return # 다음 상태 얻기 self.state = self.state.next(action) self.on_draw() # AI의 턴 self.master.after(1, self.turn_of_ai)
def turn_of_human(self, touch): global state # ゲーム終了時 if state.is_done(): state = State() self.reset() return # 先手でない時 if not state.is_first_player(): return # クリック位置を行動に変換 x = int(touch.pos[0] / 160) y = int(touch.pos[1] / 160) action = x + y * 3 if x < 0 or 2 < x or y < 0 or 2 < y: # 範囲外 return # 合法手でない時 if not (action in state.legal_actions()): return # 次の状態の取得 state = state.next(action) # 丸追加 self.draw_piece(action) # AIのターン self.turn_of_ai()
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('간이 장기') # 게임 상태 생성 self.state = State() self.select = -1 # 선택(-1: 없음, 0~11: 매스, 12~14: 획득한 말) # 방향 정수 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) # PV MCTS를 활용한 행동 선택을 수행하는 함수 생성 self.next_action = pv_mcts_action(model, 0.0) # 이미지 준비 self.images = [(None, None, None, None)] for i in range(1, 5): image = Image.open('piece{}.png'.format(i)) self.images.append(( ImageTk.PhotoImage(image), ImageTk.PhotoImage(image.rotate(180)), ImageTk.PhotoImage(image.resize((40, 40))), ImageTk.PhotoImage(image.resize((40, 40)).rotate(180)))) # 캔버스 생성 self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 화면 갱신 self.on_draw()
def turn_of_human(self, event): # 게임 종료 시 if self.state.is_done(): self.state = State() self.on_draw() return # 선 수가 아닌 경우 if not self.state.is_first_player(): return # 클릭 위치를 행동으로 변환 x = int(event.x / 40) y = int(event.y / 40) if x < 0 or 5 < x or y < 0 or 5 < y: # 범위 외 return action = x + y * 6 # 합법적인 수가 아닌 경우 legal_actions = self.state.legal_actions() if legal_actions == [36]: action = 36 # 패스 if action != 36 and not (action in legal_actions): return # 다음 상태 얻기 self.state = self.state.next(action) self.on_draw() # AI의 턴 self.master.after(1, self.turn_of_ai)
def _rollout(state: State): """ Return a result of a random rollout """ board = state.board.copy() player = state.player valid_actions = set(state.allowed_actions) print(state) print(valid_actions) # Random descent until game end while not State.is_player_won(board, -player): # Make random move action = random.choice(list(valid_actions)) print(action) board[action] = player state.board = board print(state) player = -player # Update valid actions valid_actions.remove(action) empty_neighbors = [x for x in NEIGHBORS[action] if board[x] == 0] valid_actions.update(empty_neighbors) print(valid_actions) if len(valid_actions) == 0: # Game draw return 0 print() return -1 if player == state.player else 1
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('簡易将棋') # ゲーム状態の生成 self.state = State() self.select = -1 # 選択(-1:なし, 0~11:マス, 12~14:持ち駒) # 方向定数 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # イメージの準備 self.images = [(None, None, None, None)] for i in range(1, 5): image = Image.open('piece{}.png'.format(i)) self.images.append(( ImageTk.PhotoImage(image), ImageTk.PhotoImage(image.rotate(180)), ImageTk.PhotoImage(image.resize((40, 40))), ImageTk.PhotoImage(image.resize((40, 40)).rotate(180)))) # キャンバスの生成 self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw()
def turn_of_human(self, event): # 게임 종료 시 if self.state.is_done(): self.state = State() self.on_draw() return # 선 수가 아닌 경우 if not self.state.is_first_player(): return # 획득한 말의 종류 얻기 captures = [] for i in range(3): if self.state.pieces[12 + i] >= 2: captures.append(1 + i) if self.state.pieces[12 + i] >= 1: captures.append(1 + i) # 말 선택과 이동 위치 계산(0~11: 매스. 12~13: 획득한 말) p = int(event.x / 80) + int((event.y - 40) / 80) * 3 if 40 <= event.y and event.y <= 360: select = p elif event.x < len(captures) * 40 and event.y > 360: select = 12 + int(event.x / 40) else: return # 말 선택 if self.select < 0: self.select = select self.on_draw() return # 말 선택과 이동을 행동으로 변환 action = -1 if select < 12: # 말 이동 시 if self.select < 12: action = self.state.position_to_action( p, self.position_to_direction(self.select, p)) # 획득한 말 배치 시 else: action = self.state.position_to_action( p, 8 - 1 + captures[self.select - 12]) # 합법적인 수가 아닌 경우 if not (action in self.state.legal_actions()): self.select = -1 self.on_draw() return # 다음 상태 얻기 self.state = self.state.next(action) self.select = -1 self.on_draw() # AI의 턴 self.master.after(1, self.turn_of_ai)
def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): self.state = State() self.on_draw() return # 先手でない時 if not self.state.is_first_player(): return # 持ち駒の種類の取得 captures = [] for i in range(8): if self.state.pieces[81 + i] >= 2: captures.append(1 + i) if self.state.pieces[81 + i] >= 1: captures.append(1 + i) # 駒の選択と移動の位置の計算(0-80:マス, 81-88:持ち駒) p = int(event.x / 80) + int((event.y - 40) / 80) * 9 if 40 <= event.y and event.y <= 760: select = p elif event.x < len(captures) * 40 and event.y > 760: select = 81 + int(event.x / 40) else: return # 駒の選択 if self.select < 0: self.select = select self.on_draw() return # 駒の選択と移動を行動に変換 action = -1 if select < 81: # 駒の移動時 if self.select < 81: action = self.state.position_to_action( p, self.position_to_direction(self.select, p)) # 持ち駒の配置時 else: action = self.state.position_to_action( p, 74 - 1 + captures[self.select - 81]) # 合法手でない時 if not (action in self.state.legal_actions()): self.select = -1 self.on_draw() return # 次の状態の取得 self.state = self.state.next(action) self.select = -1 self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai)
def play(model, using_saved_state=False, saving_ontheway_state=False): ''' 1ゲームの実行 ''' # 学習データ history = [] # 状態の生成 if using_saved_state: state = load_state() if not state: state = State() else: state = State() starttime = time.time() print('') while True: # ゲーム終了時 if state.is_done(): endtime = time.time() print("first player is ", "lose" if state.is_lose() else "win") print("first player num:", state.piece_count(state.pieces)) print('elapsed time', endtime - starttime) print(state) break # 合法手の確率分布の取得 scores = pv_mcts_scores(model, state, SP_TEMPERATURE) # 学習データに状態と方策を追加 policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy history.append([[state.pieces, state.enemy_pieces], policies, None]) # 行動の取得 if len(history) % 10 == 0: print("state len: ", len(history)) print(state) if saving_ontheway_state and len(history) == 25: save_state(state) action = np.random.choice(state.legal_actions(), p=scores) # 次の状態の取得 state = state.next(action) # 学習データに価値を追加 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def play(next_actions): state = State() while True: if state.is_done(): break action_idx = 0 if state.is_first_player() else 1 next_action = next_actions[action_idx] action = next_action(state) state = state.next_state(action) return first_player_point(state)
def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): print("first player is ", "lose" if self.state.is_lose() else "win") self.state = State() self.prev_state = None self.on_draw() return # 手番をチェック is_human_turn = None if self.ai_is_first: is_human_turn = not self.state.is_first_player() else: is_human_turn = self.state.is_first_player() if not is_human_turn: return # クリック位置を行動に変換 x = int(event.x / 40) y = int(event.y / 40) is_back = x > BOARD_SIZE - 1 print("x y", x, y) if is_back and self.prev_state: print("check modoru") print("") self.state = self.prev_state self.prev_state = None self.on_draw() return if x < 0 or (BOARD_SIZE - 1) < x or y < 0 or (BOARD_SIZE - 1) < y: # 範囲外 print("範囲外") return action = x + y * BOARD_SIZE print("human", action, get_coodicate(action)) # 合法手でない時 legal_actions = self.state.legal_actions() if legal_actions == [ALL_PIECES_NUM]: action = ALL_PIECES_NUM # パス if action != ALL_PIECES_NUM and not (action in legal_actions): return # 次の状態の取得 self.prev_state = self.state # 現在の状態を保存 self.state = self.state.next(action) print("check2") self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai)
def sample_episode(pi): history = [] s = State(deal=True) while not s.terminal(): a = pi[s.get_state()] # rewards do not need to be appended to history as rewards are only *rewarded* when entering the terminal state. history.append([s.get_state(), a]) s, r = step(s, a) return history, r
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title("三目並べ") self.state = State() self.next_action = mini_max_action self.c = tk.Canvas(self, width=240, height=240, highlightthickness=0) self.c.bind("<Button-1>", self.turn_of_human) self.c.pack() self.on_draw()
def play(next_actions): state = State() while True: if state.is_done(): break next_action = next_actions[0] if state.is_first_player() else next_actions[1] action = next_action(state) state = state.next(action) return first_player_point(state)
def get_e_greedy_action(Q: dict, N: dict, state: State): epsilon = 100 / (100 + N[state.get_state()]) chosen_action = None if np.random.uniform() > epsilon: max_q = -1e9 for a in ACTIONS: q = Q[state.get_state(), a] if q > max_q: max_q = q chosen_action = a else: chosen_action = random.choice(ACTIONS) return chosen_action
def test_line_sprite(self): entity_group = EntityGroup([LineSprite(RED,(0,0),(100,100))]) state = State(entity_group) result = len(entity_group.dict["all"].sprites()) self.assertEqual(1, result)
def main(): print("8dqn") # Create DQN print("making model") model = make_dqn() # model = load_model(loadpath) # Create computational graph print("creating graph") lr = 0.00025 # sgd = SGD(lr=lr, decay=0.0, momentum=0.0, nesterov=False) # model.compile(optimizer=sgd, loss='mse', metrics=['mse', 'accuracy']) rms = RMSprop(lr=lr, rho=0.95, epsilon=0.01) model.compile(optimizer=rms, loss='mse', metrics=['mse', 'accuracy']) # Load memory and starting state # memory = json.loads(memorypath) memory = [] state = State() # Train model iterations = 1000 for iteration in range(iterations): q_iteration(model, memory, iteration)
def process_REVEAL(self, reveal, role, players, spies=None): # ROLE Resistance. index = self.channel.split('-')[-1] spy = bool(role.split(' ')[1] == 'Spy') bot = self.constructor(State(), int(index), spy) if self.logger is None: self.logger = ResistanceLogger(self.protocol) bot.log.addHandler(self.logger) bot.log.setLevel(logging.DEBUG) bot.recipient = self.sender self.bots[self.channel] = bot # PLAYERS 1-Deceiver, 2-Random, 3-Hippie; participants = [] for p in players.split(' ')[1:]: participants.append(self.makePlayer(p.rstrip(','))) bot.game.players = participants # SPIES 1-Deceiver. saboteurs = set() if spies: for s in spies.split(' ')[1:]: saboteurs.add(self.makePlayer(s.rstrip(','))) bot.game.spies = saboteurs bot.onGameRevealed(participants, saboteurs)
def __init__(self, state: State, parent=None): self.state = deepcopy(state) self.untried_actions = state.get_available_actions() self.parent = parent self.children = {} self.Q = 0 # 节点最终收益价值 self.N = 0 # 节点被访问的次数
def create_state(self): # 自分の駒の色は全て確定 my_pieces = [0] * 36 for i, _ in enumerate(my_pieces): if i in self.my_pieces_coo: my_pieces[i] = self.my_pieces_color[self.my_pieces_coo.index( i)] # 対戦相手の駒の色は不明な場合があるため特殊処理 enemy_pieces = [0] * 36 blue_num = 4 - self.enemy_pieces_color.count(1) red_num = 4 - self.enemy_pieces_color.count(2) # rdm_en_pieces_colorを不明な要素-1をランダムに1か2に置き換えたself.enemy_pieces_colorにする rdm_en_pieces_color = self.enemy_pieces_color rdm_list = [1] * blue_num + [2] * red_num random.shuffle(rdm_list) for i, color in enumerate(rdm_en_pieces_color): if color == -1: rdm_en_pieces_color[i] = rdm_list.pop() for i, _ in enumerate(enemy_pieces): if i in self.enemy_pieces_coo: enemy_pieces[i] = rdm_en_pieces_color[ self.enemy_pieces_coo.index(i)] state = State(my_pieces, enemy_pieces, self.depth) return state
def search(state, alpha, beta): if (time() - start_time) >= self.max_time: return -1 player = State.opponent(state.player) moves = state.board.legal_moves(player) if state.depth == plies or len(moves) == 0: self.evaluate(state) else: for move in moves: next_board = deepcopy(state.board) next_board.make_move(move[0], move[1], player) next_state = GameState(next_board, parent=state, player=player, depth=state.depth+1, move=move) ret = search(next_state, alpha, beta) if ret == -1: return ret if state.player is State.black: alpha = max(alpha, next_state.score) state.score = alpha else: beta = min(beta, next_state.score) state.score = beta if alpha >= beta: break state.children[move] = next_state
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('リバーシ') # ゲーム状態の生成 self.state = State() # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # キャンバスの生成 self.c = tk.Canvas(self, width=240, height=240, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw()
def main(): print "Welcome to Chess v 2.0" state = State() white = Player("White", state.config.whitePieces, state.config.blackPieces) black = Player("Black", state.config.blackPieces, state.config.whitePieces) while not state.gameOver(white, black): if white.turn: state.whiteDisplay() white.turn = False black.turn = True print "Possible moves:" actions = white.getLegalActions(state) for action in actions: print action.piece.pos, action.toString() moveString = raw_input("White's Turn: ") player = white elif black.turn: state.blackDisplay() black.turn = False white.turn = True print "Possible moves:" actions = black.getLegalActions(state) for action in actions: print action.piece.pos, action.toString() moveString = raw_input("Black's Turn: ") player = black moveString.strip() if moveString == "q": break action = state.generateActionFromString(moveString) nextState = player.makeMove(state, action) print "--------------------------------------" print "" del state state = nextState print "Game Over"
def __repr__(self): return "<GameState: {} {}>".format(State.player_name(self.player), self.score)
def move(self, board): start_time = time() best_move = None plies = 0 max_plies = 64 - board.total_count() while True: plies += 1 if plies > max_plies or (time() - start_time) >= self.max_time: return best_move def search(state, alpha, beta): if (time() - start_time) >= self.max_time: return -1 player = State.opponent(state.player) moves = state.board.legal_moves(player) if state.depth == plies or len(moves) == 0: self.evaluate(state) else: for move in moves: next_board = deepcopy(state.board) next_board.make_move(move[0], move[1], player) next_state = GameState(next_board, parent=state, player=player, depth=state.depth+1, move=move) ret = search(next_state, alpha, beta) if ret == -1: return ret if state.player is State.black: alpha = max(alpha, next_state.score) state.score = alpha else: beta = min(beta, next_state.score) state.score = beta if alpha >= beta: break state.children[move] = next_state initial_player = State.opponent(self.color) current_state = GameState(board, depth=0, player=initial_player) ret = search(current_state, float("-inf"), float("inf")) if ret == -1: return best_move scores = [(s.score, m) for m, s in current_state.children.items()] if len(scores) == 0: # pass return None if self.color is State.black: best_score, best_move = max(scores) else: best_score, best_move = min(scores) s = "abcdefgh"[best_move[1]] + str(best_move[0] + 1) print("Searched {} plies and got {} ({})".format(plies, s, best_score)) return best_move