def validate_move( board_list: List[List[Piece]], from_row: int, from_col: int, to_row: int, to_col: int, ) -> bool: """Validate move given by current and desired piece coordinates. Args: board_list: Information about positions of pieces. from_row: Row of board of original piece location. from_col: Column of board of original piece location. to_row: Cow of board of desired piece location. to_col: Column of board of desired piece location. Returns: True if given move is valid, otherwise false. """ # not among available moves if (to_row, to_col) not in Rules.generate_all_moves(from_row, from_col): return False # can't move piece from outside of board if from_row < 0 or from_col < 0 or from_row > 7 or from_col > 7: return False # cant move out of board if to_row < 0 or to_col < 0 or to_row > 7 or to_col > 7: return False # target square must be empty if board_list[to_row][to_col] is not None: return False # can't move empty square p = board_list[from_row][from_col] if p is None: return False # cant move in opposite direction, except king if p.direction == Constants().UP and from_row < to_row and not p.king: return False if p.direction == Constants( ).DOWN and from_row > to_row and not p.king: return False # cant jump over itself or empty square between_row, between_col = Rules.get_between_position( from_row, from_col, to_row, to_col) if between_row is not None and between_col is not None: pp = board_list[between_row][between_col] if pp is None or pp.ptype == p.ptype: return False return True
def __init__(self, name: str, ptype: int, load_model: bool = True, epsilon: float = 0.0): self.board_enc = BoardEncoding() if ptype == Constants().DARK: self.board_enc.dark = 0.5 self.board_enc.light = -0.5 self.board_enc.dark_king = 1. self.board_enc.light_king = -1. elif ptype == Constants().LIGHT: self.board_enc.dark = -0.5 self.board_enc.light = 0.5 self.board_enc.dark_king = -1. self.board_enc.light_king = 1. else: raise ValueError super().__init__(name, ptype) self.render = False self.load_model = load_model # 상태와 행동의 크기 정의 #self.state_size = (8, 8, 1) self.action_size = 4 # DQN 하이퍼파라미터 self.discount_factor = 0.99 self.learning_rate = 0.001 self.epsilon = epsilon self.epsilon_decay = 0.9999 self.epsilon_min = 0.00 self.batch_size = 64 self.train_start = 3000 # 리플레이 메모리, 최대 크기 2000 self.memory = deque(maxlen=4000) # 모델과 타깃 모델 생성 self.model = self.build_model() self.target_model = self.build_model() # 타깃 모델 초기화 self.update_target_model() if self.load_model: self.model.load_weights("./save_model/checker_dqn.h5")
def test_become_king(self, empty_board): # Dark piece empty_board.board_list[5][5] = DarkPiece() empty_board.move(Constants().DARK, 5, 5, 6, 6) assert empty_board.board_list[6][6].is_king() is False empty_board.move(Constants().DARK, 6, 6, 7, 7) assert empty_board.board_list[7][7].is_king() is True # Light piece empty_board.board_list[2][2] = LightPiece() empty_board.move(Constants().LIGHT, 2, 2, 1, 1) assert empty_board.board_list[1][1].is_king() is False empty_board.move(Constants().LIGHT, 1, 1, 0, 0) assert empty_board.board_list[0][0].is_king() is True
def get_opponent_type(ptype: int, ) -> int: """Get a type of opponent agent. Note: In checkers there is only one pair of agents competing with each other. Args: ptype: Type of piece. Returns: opponent_type: Type of opponent agent. """ if ptype == Constants().DARK: opponent_type = Constants().LIGHT else: opponent_type = Constants().DARK return opponent_type
def __init__( self, ptype: int, ): """Initialize random agent. Args: name: name of agent. ptype: type of piece that agent is responsible for. """ if ptype == Constants().DARK: name = "HumanDark" elif ptype == Constants().LIGHT: name = "HumanLight" else: raise ValueError super().__init__(name, ptype)
def __init__(self): self._constants = Constants() self._encoding = {} self.empty = 0 self.dark = 10 self.dark_king = 11 self.light = 20 self.light_king = 21
def board_list2numpy( board_list: List[List], ) -> np.array: board_size = len(board_list) board_numpy = Constants().EMPTY * np.ones((board_size, board_size)) for row in range(board_size): for col in range(board_size): if board_list[row][col] is not None: board_numpy[row][col] = board_list[row][col].ptype return board_numpy
def main(): max_episodes = 10000 with tf.Session() as sess: # saver = tf.train.Saver() ####### Agent Setting ####### MasterAgent = RandomAgentLight("Teacher Agent") MyAgent = DqnAgent(sess, "doublejtoh Agent", Constants().LIGHT) tf.global_variables_initializer().run() current_agent = MasterAgent next_agent = MyAgent for episode in range(max_episodes): step = 0 done = False obs = env.reset() env.render() while not done: state = board_list2numpy(obs) if current_agent._name == 'doublejtoh Agent': from_row, from_col, to_row, to_col = current_agent.act(state, episode) # print(from_row, from_col, to_row, to_col) else: from_row, from_col, to_row, to_col = current_agent.act(obs) # print("Teacher Agent: ", from_row, from_col, to_row, to_col) obs, reward, done, info = env.step(current_agent, from_row, from_col, to_row, to_col) # print(info) if done: print(f"{current_agent} agent wins.") action = (to_row, to_col) next_state = board_list2numpy(obs) if current_agent._name == 'doublejtoh Agent': current_agent.consume(state, action, reward, done, episode, next_state) # here, obs means new state. save to memory. ### change turn ### temporary_agent = current_agent current_agent = next_agent next_agent = temporary_agent env.render() step += 1 print("Episode ", episode, "step: ", step) # if current_agent._name == 'doublejtoh Agent': # current_agent.consume_after_episode(episode) # replay train. MyAgent.consume_after_episode(episode)
def board_list2numpy( board_list: List[List], encoding: BoardEncoding=BoardEncoding(), ) -> np.array: """Convert the state of game (`board_list`) into 2D NumPy Array using `encoding`. Args: board_list: (List[List[Piece]]) State of the game. encoding: (BoardEncoding) Optional argument. If not given default encoding will be utilized. Returns: board_numpy: (np.array) """ board_size = len(board_list) constants = Constants() board_numpy = encoding[constants.EMPTY] * np.ones((board_size, board_size)) for row in range(board_size): for col in range(board_size): if board_list[row][col] is not None: ptype = board_list[row][col].ptype king = board_list[row][col].king if ptype == constants.LIGHT: if king: piece_type = constants.LIGHT_KING else: piece_type = constants.LIGHT else: # DARK if king: piece_type = constants.DARK_KING else: piece_type = constants.DARK board_numpy[row][col] = encoding[piece_type] return board_numpy
def test_move(self, empty_board): # Dark piece empty_board.board_list[5][5] = DarkPiece() with pytest.raises(ValueError): empty_board.move(Constants().DARK, 5, 5, 4, 4) # move any direction empty_board.board_list[5][5].make_king() empty_board.move(Constants().DARK, 5, 5, 4, 4) empty_board.move(Constants().DARK, 4, 4, 5, 5) # Light Piece empty_board.board_list[5][5] = LightPiece() with pytest.raises(ValueError): empty_board.move(Constants().LIGHT, 5, 5, 6, 6) # move any direction empty_board.board_list[5][5].make_king() empty_board.move(Constants().LIGHT, 5, 5, 6, 6) empty_board.move(Constants().LIGHT, 6, 6, 5, 5)
def test_move(self, empty_board): # Dark piece init_board = empty_board empty_board.board_list[5][5] = DarkPiece() empty_board.move(Constants().DARK, 5, 5, 4, 4) assert init_board == empty_board # move any direction empty_board.board_list[5][5].make_king() empty_board.move(Constants().DARK, 5, 5, 4, 4) empty_board.move(Constants().DARK, 4, 4, 5, 5) # Light Piece init_board = empty_board empty_board.board_list[5][5] = LightPiece() empty_board.move(Constants().LIGHT, 5, 5, 6, 6) assert init_board == empty_board # move any direction empty_board.board_list[5][5].make_king() empty_board.move(Constants().LIGHT, 5, 5, 6, 6) empty_board.move(Constants().LIGHT, 6, 6, 5, 5)
def __init__( self, name: str, ): super().__init__(name, Constants().LIGHT)
def light(): return Constants().LIGHT
def dark(): return Constants().DARK
from agent import DQNChecker if __name__ == "__main__": args = argparse.ArgumentParser() args.add_argument('--render', type=bool, default=False) args.add_argument('--episodes', type=int, default=3000) args.add_argument('--train', type=bool, default=False) args.add_argument('--thresold', type=int, default=3000) config = args.parse_args() env = gym.make("Checkers") if config.train: a1 = DQNChecker("Agent_1", Constants().DARK, True, 0.1) a2 = DQNChecker("Agent_2", Constants().LIGHT, True, 0.1) else: a1 = DQNChecker("Agent_1", Constants().DARK, True, 0.0) a2 = DQNChecker("Agent_2", Constants().LIGHT, True, 0.0) history = {} history[a1] = {'scores': [], 'episodes': []} history[a2] = {'scores': [], 'episodes': []} agent_tag = {} agent_tag[a1] = 'Agent_1' agent_tag[a2] = 'Agent_2' for e in range(config.episodes): done = False score = 0
def __init__(self, ): super().__init__(Constants().DARK)
def __init__(self, ): super().__init__(Constants().LIGHT)