Example #1
0
 def test_fight2(self):
     player_blue = NNPlayer(Color.BLUE,
                            n_simulations=100,
                            janggi_net=JanggiNetwork(),
                            temperature_start=0.01,
                            temperature_threshold=30,
                            temperature_end=0.01)
     player_red = NNPlayer(Color.RED,
                           n_simulations=100,
                           janggi_net=JanggiNetwork(),
                           temperature_start=0.01,
                           temperature_threshold=30,
                           temperature_end=0.01)
     fight(player_blue, player_red, 100)
Example #2
0
 def test_complete(self):
     board = Board()
     janggi_nn = JanggiNetwork()
     features_in = board.get_features(Color.BLUE, 1)
     features_in = features_in.view(1, -1, BOARD_HEIGHT, BOARD_WIDTH)
     policy, value = janggi_nn(features_in)
     self.assertEqual(list(policy.shape), [1, 58, 10, 9])
     self.assertEqual(list(value.shape), [1, 1])
Example #3
0
 def test_single_action_nn(self):
     n_simulations = 800
     player_blue = NNPlayer(Color.BLUE,
                            n_simulations=n_simulations,
                            janggi_net=JanggiNetwork(),
                            temperature_start=0.01,
                            temperature_threshold=30,
                            temperature_end=0.01)
     player_red = NNPlayer(Color.RED,
                           n_simulations=n_simulations,
                           janggi_net=JanggiNetwork(),
                           temperature_start=0.01,
                           temperature_threshold=30,
                           temperature_end=0.01)
     board = get_random_board()
     game = Game(player_blue, player_red, board)
     game.get_next_action()
Example #4
0
 def __init__(self,
              predictor,
              n_simulations=800,
              iter_max=200,
              n_simulation_opponent=800,
              dir_base="model"):
     print("Setting trainer")
     self.predictor = predictor.to(DEVICE)
     self.n_simulations = n_simulations
     self.iter_max = iter_max
     self.n_simulations_opponent = n_simulation_opponent
     self.model_saver = ModelSaver(dir_base)
     self.optimizer = torch.optim.SGD(self.predictor.parameters(),
                                      lr=LEARNING_RATE,
                                      momentum=0.9,
                                      weight_decay=0.0001)
     if not TRAIN_NEW_MODEL:
         self.model_saver.load_latest_model(self.predictor, self.optimizer)
     self.old_model = JanggiNetwork(20)
     self.old_model.to(DEVICE)
Example #5
0
 def test_complete2(self):
     board = Board()
     janggi_nn = JanggiNetwork()
     features_in1 = board.get_features(Color.BLUE, 1)
     features_in1 = features_in1.view(1, -1, BOARD_HEIGHT, BOARD_WIDTH)
     policy1, value1 = janggi_nn(features_in1)
     features_in2 = board.get_features(Color.RED, 1)
     features_in2 = features_in2.view(1, -1, BOARD_HEIGHT, BOARD_WIDTH)
     policy2, value2 = janggi_nn(features_in2)
     self.assertNotEqual(features_in1.tolist(), features_in2.tolist())
     self.assertNotEqual(value1, value2)
Example #6
0
def get_player(player_name, color, model_saver):
    if player_name == "random_mcts":
        return RandomMCTSPlayer(color,
                                n_simulations=800,
                                temperature_start=0.01,
                                temperature_threshold=30,
                                temperature_end=0.01)
    else:
        predictor = JanggiNetwork()
        model_saver.load_index_model(predictor, None, player_name)
        return NNPlayer(color,
                        n_simulations=400,
                        janggi_net=predictor,
                        temperature_start=0.01,
                        temperature_threshold=30,
                        temperature_end=0.01)
Example #7
0
def get_model():
    model = JanggiNetwork(N_RESIDUAL_DEFAULT)

    def load_latest_model():
        model_saver_temp = ModelSaver()
        model_saver_temp.load_latest_model(model)

    load_latest_model()
    model.to(DEVICE)
    model.eval()
    return model
def get_model():
    model = JanggiNetwork()

    def load_latest_model():
        model_saver = ModelSaver()
        model_saver.load_latest_model(model)

    load_latest_model()
    model.to(DEVICE)
    model.eval()
    return model
 def __init__(self,
              color,
              c_puct=DEFAULT_C_PUCT,
              n_simulations=DEFAULT_N_SIMULATIONS,
              current_node=None,
              janggi_net=None,
              temperature_start=DEFAULT_TEMPERATURE_START,
              temperature_threshold=DEFAULT_TEMPERATURE_THRESHOLD,
              temperature_end=DEFAULT_TEMPERATURE_END,
              think_when_other=False,
              print_info=False):
     super().__init__(color, c_puct, n_simulations, current_node,
                      temperature_start, temperature_threshold,
                      temperature_end, think_when_other, print_info)
     self.janggi_net = janggi_net or JanggiNetwork()
     if isinstance(self.janggi_net, JanggiNetwork):
         self._is_predictor = True
     else:
         self._is_predictor = False
from ia.janggi_network import JanggiNetwork
from ia.trainer import Trainer

# Example:
# CUDA_VISIBLE_DEVICES=2 python3 continuous_learning.py --n_fights 30 --c_puct 1.0 --n_residuals 20
# CUDA_VISIBLE_DEVICES=0 python3 continuous_learning.py --n_fights 30 --c_puct 1.0 --n_iterations 200 --number_simulations 800 --n_residuals 40 --train_on_all True --train_new_model True

if __name__ == "__main__":
    trainer = Trainer(JanggiNetwork(),
                      n_simulations=800,
                      iter_max=200,
                      n_simulation_opponent=800)
    trainer.continuous_learning()
Example #11
0
class Trainer:
    def __init__(self,
                 predictor,
                 n_simulations=800,
                 iter_max=200,
                 n_simulation_opponent=800,
                 dir_base="model"):
        print("Setting trainer")
        self.predictor = predictor.to(DEVICE)
        self.n_simulations = n_simulations
        self.iter_max = iter_max
        self.n_simulations_opponent = n_simulation_opponent
        self.model_saver = ModelSaver(dir_base)
        self.optimizer = torch.optim.SGD(self.predictor.parameters(),
                                         lr=LEARNING_RATE,
                                         momentum=0.9,
                                         weight_decay=0.0001)
        if not TRAIN_NEW_MODEL:
            self.model_saver.load_latest_model(self.predictor, self.optimizer)
        self.old_model = JanggiNetwork(20)
        self.old_model.to(DEVICE)

    def run_episode(self):
        examples = []
        board = get_random_board()
        initial_node = MCTSNode(is_initial=True)
        player_blue = NNPlayer(Color.BLUE,
                               n_simulations=self.n_simulations,
                               current_node=initial_node,
                               janggi_net=self.predictor,
                               temperature_start=1,
                               temperature_threshold=30,
                               temperature_end=0.01)
        player_red = NNPlayer(Color.RED,
                              n_simulations=self.n_simulations,
                              current_node=initial_node,
                              janggi_net=self.predictor,
                              temperature_start=1,
                              temperature_threshold=30,
                              temperature_end=0.01)
        game = Game(player_blue, player_red, board)
        while not game.is_finished(self.iter_max):
            new_action = game.get_next_action()
            game.actions.append(new_action)
            if game.current_player == Color.BLUE:
                examples.append([
                    board.get_features(game.current_player, game.round),
                    player_blue.current_node.get_policy(game.current_player),
                    Color.BLUE
                ])
                examples.append([
                    board.get_features(game.current_player,
                                       game.round,
                                       data_augmentation=True),
                    player_blue.current_node.get_policy(
                        game.current_player, data_augmentation=True),
                    Color.BLUE
                ])
            else:
                examples.append([
                    board.get_features(game.current_player,
                                       game.round,
                                       data_augmentation=True),
                    player_red.current_node.get_policy(game.current_player,
                                                       data_augmentation=True),
                    Color.RED
                ])
                examples.append([
                    board.get_features(game.current_player, game.round),
                    player_red.current_node.get_policy(game.current_player),
                    Color.RED
                ])
            game.board.apply_action(new_action)
            game.switch_player()
            game.board.invalidate_action_cache(
                new_action)  # Try to reduce memory usage
            game.round += 1
        winner = game.get_winner()
        set_winner(examples, winner)
        return examples

    def learn_policy(self, n_iterations, n_episodes):
        for _ in range(n_iterations):
            if self.model_saver.has_last_episode():
                examples = self.model_saver.load_last_episode()
            else:
                examples = []
                for ep in range(n_episodes):
                    begin_time = time.time()
                    examples += self.run_episode()
                    print("Time Episode", ep, ": ", time.time() - begin_time)
                self.model_saver.save_episodes(examples)
            self.train_and_fight(examples)

    def learn_supervised(self, training_file):
        print("Generate training data...")
        with open(training_file) as f:
            examples_all = list(_raw_to_examples(f))
        print("Start training")
        self.train_and_fight(examples_all)

    def continuous_learning(self):
        self.model_saver.load_latest_model(self.old_model, None)
        self.old_model.to(DEVICE)
        while True:
            if self.model_saver.has_last_episode_raw():
                print("Start new learning")
                self.continuous_learning_once()
            else:
                print("Waiting for more episodes")
                time.sleep(WAINTING_TIME_IF_NO_EPISODE)

    def continuous_learning_once(self):
        # First, train
        for _ in range(EPOCH_NUMBER_CONTINUOUS):
            training_set = []
            for example in _raw_to_examples(
                    self.model_saver.all_episodes_raw_iterators(),
                    PROP_POPULATION_FOR_LEARNING):
                training_set.append(example)
                if len(training_set) > N_LAST_GAME_TO_CONSIDER:
                    if not TRAIN_ON_ALL:
                        break
                    self.train(training_set)
                    training_set = []
            self.train(training_set)
        # Then, fight!
        # old_model = copy.deepcopy(self.predictor)
        self.model_saver.load_latest_model(self.old_model, None)
        self.old_model.to(DEVICE)
        victories = 0
        print("Start the fights!")
        for i in range(N_FIGHTS):
            if i < N_FIGHTS / 2:
                print("I am BLUE")
                new_player = NNPlayer(Color.BLUE,
                                      n_simulations=self.n_simulations,
                                      janggi_net=self.predictor,
                                      temperature_start=0.01,
                                      temperature_threshold=30,
                                      temperature_end=0.01)
                old_player = NNPlayer(Color.RED,
                                      n_simulations=self.n_simulations,
                                      janggi_net=self.old_model,
                                      temperature_start=0.01,
                                      temperature_threshold=30,
                                      temperature_end=0.01)
                winner = fight(new_player, old_player, self.iter_max)
                if winner == Color.BLUE:
                    victories += 1
            else:
                print("I am RED")
                new_player = NNPlayer(Color.RED,
                                      n_simulations=self.n_simulations,
                                      janggi_net=self.predictor,
                                      temperature_start=0.01,
                                      temperature_threshold=30,
                                      temperature_end=0.01)
                old_player = NNPlayer(Color.BLUE,
                                      n_simulations=self.n_simulations,
                                      janggi_net=self.old_model,
                                      temperature_start=0.01,
                                      temperature_threshold=30,
                                      temperature_end=0.01)
                winner = fight(old_player, new_player, self.iter_max)
                if winner == Color.RED:
                    victories += 1
            if (victories + N_FIGHTS - i -
                    1) / N_FIGHTS * 100 < VICTORY_THRESHOLD:
                # There is no more hope...
                break
        victory_percentage = victories / N_FIGHTS * 100
        if victory_percentage > VICTORY_THRESHOLD:
            # Replace model
            print("The model was good enough", victory_percentage)
            self.model_saver.save_weights(self.predictor,
                                          optimizer=self.optimizer)
        else:
            # We do not save the model
            print("The model was not good enough", victory_percentage)
            # self.model_saver.load_latest_model(self.predictor, optimizer=self.optimizer)

    def train_and_fight(self, examples):
        self.train(examples)
        self.organize_fight()

        self.model_saver.save_weights(self.predictor, optimizer=self.optimizer)
        self.model_saver.rename_last_episode()

    def organize_fight(self):
        player_red = RandomPlayer(Color.RED)
        player_blue = NNPlayer(Color.BLUE,
                               n_simulations=self.n_simulations,
                               janggi_net=self.predictor,
                               temperature_start=0.01,
                               temperature_threshold=30,
                               temperature_end=0.01)
        fight(player_blue, player_red, self.iter_max)
        player_red = RandomMCTSPlayer(
            Color.RED,
            n_simulations=self.n_simulations_opponent,
            temperature_start=0.01,
            temperature_threshold=30,
            temperature_end=0.01)
        player_blue = NNPlayer(Color.BLUE,
                               n_simulations=self.n_simulations,
                               janggi_net=self.predictor,
                               temperature_start=0.01,
                               temperature_threshold=30,
                               temperature_end=0.01)
        fight(player_blue, player_red, self.iter_max)

    def train(self, examples):
        self.predictor.train()
        criterion = JanggiLoss()
        dataset = ExampleDataset(examples)
        if examples:
            dataloader = DataLoader(dataset,
                                    batch_size=BATCH_SIZE,
                                    shuffle=True,
                                    num_workers=0)
        else:
            dataloader = examples

        for epoch in range(EPOCH_NUMBER):
            running_loss = 0.0
            for i, example in enumerate(dataloader):
                board, actions, value = example
                self.optimizer.zero_grad()
                board = board.to(DEVICE)
                policy, value_predicted = self.predictor(board)
                value_predicted = value_predicted.view(-1, 1)
                policy = policy.to(DEVICE)
                value_predicted = value_predicted.to(DEVICE)
                actions = actions.to(DEVICE)
                value = value.view(-1, 1).to(DEVICE)
                loss = criterion((policy, value_predicted), (actions, value))
                loss.backward()
                self.optimizer.step()
                running_loss += loss.item()
                if i % LOG_PRINT_FREQ == LOG_PRINT_FREQ - 1:
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / LOG_PRINT_FREQ))
                    running_loss = 0.0
        self.predictor.eval()
Example #12
0
 def test_first(self):
     trainer = Trainer(JanggiNetwork(), 10, 10)
     examples = trainer.run_episode()
     self.assertEqual(len(examples), 20)
     for example in examples:
         self.assertIn(example[2], [-1, 1])
Example #13
0
 def test_fight(self):
     trainer = Trainer(JanggiNetwork(),
                       n_simulations=10,
                       iter_max=30,
                       n_simulation_opponent=10)
     trainer.train_and_fight([])
Example #14
0
 def test_learn(self):
     trainer = Trainer(JanggiNetwork(),
                       n_simulations=100,
                       iter_max=30,
                       n_simulation_opponent=10)
     trainer.learn_policy(n_iterations=1, n_episodes=10)