def test_fight2(self): player_blue = NNPlayer(Color.BLUE, n_simulations=100, janggi_net=JanggiNetwork(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=100, janggi_net=JanggiNetwork(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) fight(player_blue, player_red, 100)
def test_complete(self): board = Board() janggi_nn = JanggiNetwork() features_in = board.get_features(Color.BLUE, 1) features_in = features_in.view(1, -1, BOARD_HEIGHT, BOARD_WIDTH) policy, value = janggi_nn(features_in) self.assertEqual(list(policy.shape), [1, 58, 10, 9]) self.assertEqual(list(value.shape), [1, 1])
def test_single_action_nn(self): n_simulations = 800 player_blue = NNPlayer(Color.BLUE, n_simulations=n_simulations, janggi_net=JanggiNetwork(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=n_simulations, janggi_net=JanggiNetwork(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) board = get_random_board() game = Game(player_blue, player_red, board) game.get_next_action()
def __init__(self, predictor, n_simulations=800, iter_max=200, n_simulation_opponent=800, dir_base="model"): print("Setting trainer") self.predictor = predictor.to(DEVICE) self.n_simulations = n_simulations self.iter_max = iter_max self.n_simulations_opponent = n_simulation_opponent self.model_saver = ModelSaver(dir_base) self.optimizer = torch.optim.SGD(self.predictor.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=0.0001) if not TRAIN_NEW_MODEL: self.model_saver.load_latest_model(self.predictor, self.optimizer) self.old_model = JanggiNetwork(20) self.old_model.to(DEVICE)
def test_complete2(self): board = Board() janggi_nn = JanggiNetwork() features_in1 = board.get_features(Color.BLUE, 1) features_in1 = features_in1.view(1, -1, BOARD_HEIGHT, BOARD_WIDTH) policy1, value1 = janggi_nn(features_in1) features_in2 = board.get_features(Color.RED, 1) features_in2 = features_in2.view(1, -1, BOARD_HEIGHT, BOARD_WIDTH) policy2, value2 = janggi_nn(features_in2) self.assertNotEqual(features_in1.tolist(), features_in2.tolist()) self.assertNotEqual(value1, value2)
def get_player(player_name, color, model_saver): if player_name == "random_mcts": return RandomMCTSPlayer(color, n_simulations=800, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) else: predictor = JanggiNetwork() model_saver.load_index_model(predictor, None, player_name) return NNPlayer(color, n_simulations=400, janggi_net=predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01)
def get_model(): model = JanggiNetwork(N_RESIDUAL_DEFAULT) def load_latest_model(): model_saver_temp = ModelSaver() model_saver_temp.load_latest_model(model) load_latest_model() model.to(DEVICE) model.eval() return model
def get_model(): model = JanggiNetwork() def load_latest_model(): model_saver = ModelSaver() model_saver.load_latest_model(model) load_latest_model() model.to(DEVICE) model.eval() return model
def __init__(self, color, c_puct=DEFAULT_C_PUCT, n_simulations=DEFAULT_N_SIMULATIONS, current_node=None, janggi_net=None, temperature_start=DEFAULT_TEMPERATURE_START, temperature_threshold=DEFAULT_TEMPERATURE_THRESHOLD, temperature_end=DEFAULT_TEMPERATURE_END, think_when_other=False, print_info=False): super().__init__(color, c_puct, n_simulations, current_node, temperature_start, temperature_threshold, temperature_end, think_when_other, print_info) self.janggi_net = janggi_net or JanggiNetwork() if isinstance(self.janggi_net, JanggiNetwork): self._is_predictor = True else: self._is_predictor = False
from ia.janggi_network import JanggiNetwork from ia.trainer import Trainer # Example: # CUDA_VISIBLE_DEVICES=2 python3 continuous_learning.py --n_fights 30 --c_puct 1.0 --n_residuals 20 # CUDA_VISIBLE_DEVICES=0 python3 continuous_learning.py --n_fights 30 --c_puct 1.0 --n_iterations 200 --number_simulations 800 --n_residuals 40 --train_on_all True --train_new_model True if __name__ == "__main__": trainer = Trainer(JanggiNetwork(), n_simulations=800, iter_max=200, n_simulation_opponent=800) trainer.continuous_learning()
class Trainer: def __init__(self, predictor, n_simulations=800, iter_max=200, n_simulation_opponent=800, dir_base="model"): print("Setting trainer") self.predictor = predictor.to(DEVICE) self.n_simulations = n_simulations self.iter_max = iter_max self.n_simulations_opponent = n_simulation_opponent self.model_saver = ModelSaver(dir_base) self.optimizer = torch.optim.SGD(self.predictor.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=0.0001) if not TRAIN_NEW_MODEL: self.model_saver.load_latest_model(self.predictor, self.optimizer) self.old_model = JanggiNetwork(20) self.old_model.to(DEVICE) def run_episode(self): examples = [] board = get_random_board() initial_node = MCTSNode(is_initial=True) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, current_node=initial_node, janggi_net=self.predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=self.n_simulations, current_node=initial_node, janggi_net=self.predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) game = Game(player_blue, player_red, board) while not game.is_finished(self.iter_max): new_action = game.get_next_action() game.actions.append(new_action) if game.current_player == Color.BLUE: examples.append([ board.get_features(game.current_player, game.round), player_blue.current_node.get_policy(game.current_player), Color.BLUE ]) examples.append([ board.get_features(game.current_player, game.round, data_augmentation=True), player_blue.current_node.get_policy( game.current_player, data_augmentation=True), Color.BLUE ]) else: examples.append([ board.get_features(game.current_player, game.round, data_augmentation=True), player_red.current_node.get_policy(game.current_player, data_augmentation=True), Color.RED ]) examples.append([ board.get_features(game.current_player, game.round), player_red.current_node.get_policy(game.current_player), Color.RED ]) game.board.apply_action(new_action) game.switch_player() game.board.invalidate_action_cache( new_action) # Try to reduce memory usage game.round += 1 winner = game.get_winner() set_winner(examples, winner) return examples def learn_policy(self, n_iterations, n_episodes): for _ in range(n_iterations): if self.model_saver.has_last_episode(): examples = self.model_saver.load_last_episode() else: examples = [] for ep in range(n_episodes): begin_time = time.time() examples += self.run_episode() print("Time Episode", ep, ": ", time.time() - begin_time) self.model_saver.save_episodes(examples) self.train_and_fight(examples) def learn_supervised(self, training_file): print("Generate training data...") with open(training_file) as f: examples_all = list(_raw_to_examples(f)) print("Start training") self.train_and_fight(examples_all) def continuous_learning(self): self.model_saver.load_latest_model(self.old_model, None) self.old_model.to(DEVICE) while True: if self.model_saver.has_last_episode_raw(): print("Start new learning") self.continuous_learning_once() else: print("Waiting for more episodes") time.sleep(WAINTING_TIME_IF_NO_EPISODE) def continuous_learning_once(self): # First, train for _ in range(EPOCH_NUMBER_CONTINUOUS): training_set = [] for example in _raw_to_examples( self.model_saver.all_episodes_raw_iterators(), PROP_POPULATION_FOR_LEARNING): training_set.append(example) if len(training_set) > N_LAST_GAME_TO_CONSIDER: if not TRAIN_ON_ALL: break self.train(training_set) training_set = [] self.train(training_set) # Then, fight! # old_model = copy.deepcopy(self.predictor) self.model_saver.load_latest_model(self.old_model, None) self.old_model.to(DEVICE) victories = 0 print("Start the fights!") for i in range(N_FIGHTS): if i < N_FIGHTS / 2: print("I am BLUE") new_player = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) old_player = NNPlayer(Color.RED, n_simulations=self.n_simulations, janggi_net=self.old_model, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) winner = fight(new_player, old_player, self.iter_max) if winner == Color.BLUE: victories += 1 else: print("I am RED") new_player = NNPlayer(Color.RED, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) old_player = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.old_model, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) winner = fight(old_player, new_player, self.iter_max) if winner == Color.RED: victories += 1 if (victories + N_FIGHTS - i - 1) / N_FIGHTS * 100 < VICTORY_THRESHOLD: # There is no more hope... break victory_percentage = victories / N_FIGHTS * 100 if victory_percentage > VICTORY_THRESHOLD: # Replace model print("The model was good enough", victory_percentage) self.model_saver.save_weights(self.predictor, optimizer=self.optimizer) else: # We do not save the model print("The model was not good enough", victory_percentage) # self.model_saver.load_latest_model(self.predictor, optimizer=self.optimizer) def train_and_fight(self, examples): self.train(examples) self.organize_fight() self.model_saver.save_weights(self.predictor, optimizer=self.optimizer) self.model_saver.rename_last_episode() def organize_fight(self): player_red = RandomPlayer(Color.RED) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) fight(player_blue, player_red, self.iter_max) player_red = RandomMCTSPlayer( Color.RED, n_simulations=self.n_simulations_opponent, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) fight(player_blue, player_red, self.iter_max) def train(self, examples): self.predictor.train() criterion = JanggiLoss() dataset = ExampleDataset(examples) if examples: dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) else: dataloader = examples for epoch in range(EPOCH_NUMBER): running_loss = 0.0 for i, example in enumerate(dataloader): board, actions, value = example self.optimizer.zero_grad() board = board.to(DEVICE) policy, value_predicted = self.predictor(board) value_predicted = value_predicted.view(-1, 1) policy = policy.to(DEVICE) value_predicted = value_predicted.to(DEVICE) actions = actions.to(DEVICE) value = value.view(-1, 1).to(DEVICE) loss = criterion((policy, value_predicted), (actions, value)) loss.backward() self.optimizer.step() running_loss += loss.item() if i % LOG_PRINT_FREQ == LOG_PRINT_FREQ - 1: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / LOG_PRINT_FREQ)) running_loss = 0.0 self.predictor.eval()
def test_first(self): trainer = Trainer(JanggiNetwork(), 10, 10) examples = trainer.run_episode() self.assertEqual(len(examples), 20) for example in examples: self.assertIn(example[2], [-1, 1])
def test_fight(self): trainer = Trainer(JanggiNetwork(), n_simulations=10, iter_max=30, n_simulation_opponent=10) trainer.train_and_fight([])
def test_learn(self): trainer = Trainer(JanggiNetwork(), n_simulations=100, iter_max=30, n_simulation_opponent=10) trainer.learn_policy(n_iterations=1, n_episodes=10)