def training(self, acting_model: keras.Model, target_model: keras.Model,
                 models_memory: SimpleMemory, batch_size: int, gamma: float):
        training_batch = models_memory.get_batch(
            batch_size, min_rows=self.START_TRAINING_AT)
        if training_batch is not None:
            samples = [[record.state, record.reward, record.fen]
                       for record in training_batch]
            states, prizes, fens = list(map(list, zip(*samples)))
            reinforced_prizes = []
            for p, f in zip(prizes, fens):
                training_board = cb.ChessBoard(starting_fen=f)
                p = p[0]
                if not training_board.game_over():
                    next_moves, next_states, next_fens = training_board.get_moves(
                    )
                    _, chosen_state, _ = self.choose_action(
                        acting_model, next_moves, np.array(next_states),
                        next_fens)
                    estimated_next_prize = target_model.predict(
                        np.array(chosen_state.reshape((1, 384))))[0]
                    reinforced_p = p + gamma * estimated_next_prize
                else:
                    reinforced_p = p
                reinforced_prizes.append(reinforced_p)

            states = np.array(states)
            reinforced_prizes = np.array(reinforced_prizes)
            acting_model.train_on_batch(states, reinforced_prizes)
예제 #2
0
    def training(self, acting_model: keras.Model, target_model: keras.Model,
                 models_memory: SimpleMemory, batch_size: int, gamma: float):
        training_batch = models_memory.get_batch(
            batch_size, min_rows=self.START_TRAINING_AT)
        if training_batch is not None:
            samples = [[record.state, record.reward, record.fen]
                       for record in training_batch]
            states, prizes, fens = list(map(list, zip(*samples)))
            reinforced_prizes = []
            for p, f in zip(prizes, fens):
                training_board = cb.ChessBoard(starting_fen=f)
                p = p[0]
                if not training_board.game_over():
                    # predict opponent's move
                    opponents_next_moves, opponents_next_states, opponents_next_fens = \
                        training_board.get_moves(flip=True)
                    opponents_move, _, _, _ = self.choose_action(
                        target_model, opponents_next_moves,
                        np.array(opponents_next_states), opponents_next_fens)
                    training_board.make_move(opponents_move, flipped=True)
                    opponents_prize = training_board.get_results()
                    if opponents_prize > cb.ATTACK:
                        reinforced_p = p - gamma * opponents_prize
                    else:
                        # get expected next move's reward
                        possible_moves, possible_states, possible_fens = training_board.get_moves(
                        )
                        _, _, _, estimated_next_prize = self.choose_action(
                            target_model, possible_moves,
                            np.array(possible_states), possible_fens)
                        estimated_next_prize = \
                            estimated_next_prize if isinstance(estimated_next_prize, int) else estimated_next_prize[0]

                        reinforced_p = p + gamma * (estimated_next_prize -
                                                    opponents_prize)
                else:
                    reinforced_p = p
                reinforced_prizes.append(reinforced_p)

            states = np.array(states)
            reinforced_prizes = np.array(reinforced_prizes)
            acting_model.train_on_batch(states, reinforced_prizes)
예제 #3
0
                _, chosen_state, _ = choose_action(acting_model, next_moves,
                                                   np.array(next_states),
                                                   next_fens)
                estimated_next_prize = target_model.predict(
                    np.array(chosen_state.reshape((1, 384))))[0]
                reinforced_p = p + gamma * estimated_next_prize
            else:
                reinforced_p = p
            reinforced_prizes.append(reinforced_p)

        states = np.array(states)
        reinforced_prizes = np.array(reinforced_prizes)
        acting_model.train_on_batch(states, reinforced_prizes)


if LOAD:
    model_trainer = load_trainer(LOAD_FROM, NAME, action, training)
else:
    memory = SimpleMemory(MEMORY_SIZE)
    model_trainer = DQNTrainer(model, memory, action, training)

board = cb.ChessBoard()
for i in range(START_AT_STEP, TRAINING_STEPS):
    print("Step {} of {}".format(i + 1, TRAINING_STEPS))
    model_trainer.take_action(board, get_epsilon(i))
    model_trainer.train(batch_size=BATCH, gamma=GAMMA, theta=THETA)
    if i % 1000 == 0:
        model_trainer.save("tmp", "{}_{}".format(NAME, i))

model_trainer.save("final", "{}_{}k".format(NAME, int(TRAINING_STEPS / 1000)))
 def setUp(self):
     self.board = cb.ChessBoard()
 def test_checkmate_detection(self):
     fen_code = "8/8/8/5K1k/8/8/8/6R1 w k - 0 1"
     board = cb.ChessBoard(fen_code)
     move = chess.Move(chess.G1, chess.H1)
     board.make_move(move)
     self.assertEqual(board.get_results(), cb.CHECKMATE)
 def test_reward_attack(self):
     fen_code = "rnbqkbnr/ppp1pppp/8/3p4/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 1"
     board = cb.ChessBoard(fen_code)
     move = chess.Move(chess.E4, chess.D5)
     board.make_move(move)
     self.assertEqual(board.get_results(), cb.ATTACK)
 def test_attack_detection(self):
     fen_code = "rnbqkbnr/ppp1pppp/8/3p4/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 1"
     board = cb.ChessBoard(fen_code)
     move = chess.Move(chess.E4, chess.D5)
     board.make_move(move)
     self.assertTrue(board._attacked)