def training(self, acting_model: keras.Model, target_model: keras.Model, models_memory: SimpleMemory, batch_size: int, gamma: float): training_batch = models_memory.get_batch( batch_size, min_rows=self.START_TRAINING_AT) if training_batch is not None: samples = [[record.state, record.reward, record.fen] for record in training_batch] states, prizes, fens = list(map(list, zip(*samples))) reinforced_prizes = [] for p, f in zip(prizes, fens): training_board = cb.ChessBoard(starting_fen=f) p = p[0] if not training_board.game_over(): next_moves, next_states, next_fens = training_board.get_moves( ) _, chosen_state, _ = self.choose_action( acting_model, next_moves, np.array(next_states), next_fens) estimated_next_prize = target_model.predict( np.array(chosen_state.reshape((1, 384))))[0] reinforced_p = p + gamma * estimated_next_prize else: reinforced_p = p reinforced_prizes.append(reinforced_p) states = np.array(states) reinforced_prizes = np.array(reinforced_prizes) acting_model.train_on_batch(states, reinforced_prizes)
def training(self, acting_model: keras.Model, target_model: keras.Model, models_memory: SimpleMemory, batch_size: int, gamma: float): training_batch = models_memory.get_batch( batch_size, min_rows=self.START_TRAINING_AT) if training_batch is not None: samples = [[record.state, record.reward, record.fen] for record in training_batch] states, prizes, fens = list(map(list, zip(*samples))) reinforced_prizes = [] for p, f in zip(prizes, fens): training_board = cb.ChessBoard(starting_fen=f) p = p[0] if not training_board.game_over(): # predict opponent's move opponents_next_moves, opponents_next_states, opponents_next_fens = \ training_board.get_moves(flip=True) opponents_move, _, _, _ = self.choose_action( target_model, opponents_next_moves, np.array(opponents_next_states), opponents_next_fens) training_board.make_move(opponents_move, flipped=True) opponents_prize = training_board.get_results() if opponents_prize > cb.ATTACK: reinforced_p = p - gamma * opponents_prize else: # get expected next move's reward possible_moves, possible_states, possible_fens = training_board.get_moves( ) _, _, _, estimated_next_prize = self.choose_action( target_model, possible_moves, np.array(possible_states), possible_fens) estimated_next_prize = \ estimated_next_prize if isinstance(estimated_next_prize, int) else estimated_next_prize[0] reinforced_p = p + gamma * (estimated_next_prize - opponents_prize) else: reinforced_p = p reinforced_prizes.append(reinforced_p) states = np.array(states) reinforced_prizes = np.array(reinforced_prizes) acting_model.train_on_batch(states, reinforced_prizes)
_, chosen_state, _ = choose_action(acting_model, next_moves, np.array(next_states), next_fens) estimated_next_prize = target_model.predict( np.array(chosen_state.reshape((1, 384))))[0] reinforced_p = p + gamma * estimated_next_prize else: reinforced_p = p reinforced_prizes.append(reinforced_p) states = np.array(states) reinforced_prizes = np.array(reinforced_prizes) acting_model.train_on_batch(states, reinforced_prizes) if LOAD: model_trainer = load_trainer(LOAD_FROM, NAME, action, training) else: memory = SimpleMemory(MEMORY_SIZE) model_trainer = DQNTrainer(model, memory, action, training) board = cb.ChessBoard() for i in range(START_AT_STEP, TRAINING_STEPS): print("Step {} of {}".format(i + 1, TRAINING_STEPS)) model_trainer.take_action(board, get_epsilon(i)) model_trainer.train(batch_size=BATCH, gamma=GAMMA, theta=THETA) if i % 1000 == 0: model_trainer.save("tmp", "{}_{}".format(NAME, i)) model_trainer.save("final", "{}_{}k".format(NAME, int(TRAINING_STEPS / 1000)))
def setUp(self): self.board = cb.ChessBoard()
def test_checkmate_detection(self): fen_code = "8/8/8/5K1k/8/8/8/6R1 w k - 0 1" board = cb.ChessBoard(fen_code) move = chess.Move(chess.G1, chess.H1) board.make_move(move) self.assertEqual(board.get_results(), cb.CHECKMATE)
def test_reward_attack(self): fen_code = "rnbqkbnr/ppp1pppp/8/3p4/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 1" board = cb.ChessBoard(fen_code) move = chess.Move(chess.E4, chess.D5) board.make_move(move) self.assertEqual(board.get_results(), cb.ATTACK)
def test_attack_detection(self): fen_code = "rnbqkbnr/ppp1pppp/8/3p4/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 1" board = cb.ChessBoard(fen_code) move = chess.Move(chess.E4, chess.D5) board.make_move(move) self.assertTrue(board._attacked)