def nn_update(self, generation): """ updates the neural network by picking a random batch form the experience replay :param generation: the network generation (number of iterations so far) :return: average policy and value loss over all mini batches """ # setup the data set training_generator = self.experience_buffer.prepare_data(generation) step_size = training_generator.dataset.__len__() // (2 * Config.batch_size) logger.info("training data prepared, step size: {}".format(step_size)) # activate the training mode self.network = data_storage.net_to_device(self.network, Config.training_device) if Config.cyclic_learning: self.network.update_scheduler(step_size) # update the scheduler self.network.train() avg_loss_p = 0 avg_loss_v = 0 tot_batch_count = 0 for epoch in range(Config.epoch_count): # training for state_batch, value_batch, policy_batch in training_generator: # send the data to the gpu state_batch = state_batch.to(Config.training_device, dtype=torch.float) value_batch = value_batch.unsqueeze(1).to( Config.training_device, dtype=torch.float) policy_batch = policy_batch.to(Config.training_device, dtype=torch.float) # execute the training step with one batch if Config.cyclic_learning: loss_p, loss_v = self.network.train_cyclical_step( state_batch, policy_batch, value_batch) else: loss_p, loss_v = self.network.train_step( state_batch, policy_batch, value_batch) avg_loss_p += loss_p avg_loss_v += loss_v tot_batch_count += 1 # calculate the mean of the loss avg_loss_p /= tot_batch_count avg_loss_v /= tot_batch_count # activate the evaluation mode self.network = data_storage.net_to_device(self.network, Config.evaluation_device) self.network.eval() return avg_loss_p.item(), avg_loss_v.item()
def train_net(epoch_count, training_generator, csv_test_set): """ trains the neural network a few times and returns the value and prediciton error :param epoch_count: the number of epochs to train the neural network :param training_generator: the torch training generator :param csv_test_set: the test set on which the network is tested :return: prediction error, value error """ logger = logging.getLogger('Sup Learning') # create a new network to train network = networks.ResNet(Config.learning_rate, Config.n_blocks, Config.n_filters, Config.weight_decay) network = data_storage.net_to_device(network, Config.training_device) # execute the training by looping over all epochs network.train() for epoch in range(epoch_count): # training for state_batch, value_batch, policy_batch in training_generator: # send the data to the gpu state_batch = state_batch.to(Config.training_device) value_batch = value_batch.to(Config.training_device) policy_batch = policy_batch.to(Config.training_device) # execute one training step _, _ = network.train_step(state_batch, policy_batch, value_batch) # evaluation pred_error, val_error = evaluation.net_prediction_error( network, csv_test_set) logger.debug( "learning rate {}, prediction error: {}, value-error: {}".format( Config.learning_rate, pred_error, val_error)) return pred_error, val_error
def __init__(self, network): """ :param network: alpha zero network that is used for training and evaluation """ self.network = network # the network self.experience_buffer = ExperienceBuffer( ) # buffer that saves all experiences # activate the evaluation mode of the networks self.network = data_storage.net_to_device(self.network, config.evaluation_device) self.network.eval()
def mainTrain(): # The logger utils.init_logger(logging.DEBUG, file_name="log/chess_sl.log.log") logger = logging.getLogger('Sup Learning') np.set_printoptions(suppress=True, precision=6) # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) # parameters variant = "threeCheck" Config.learning_rate = 0.0001 Config.weight_decay = 1e-4 Config.n_blocks = 10 Config.n_filters = 128 epochs = 3 training_set_path = "positions-avg" + variant + ".h5" network_dir = "networks/" + variant training_progress_dir = "training_progress/" + variant # define the parameters for the training params = {'batch_size': 512, 'shuffle': True, 'num_workers': 2} # create the data set class training_set = data_processing.Dataset(training_set_path) training_generator = data.DataLoader(training_set, **params) logger.info("training set created, length: {}".format( training_set.__len__())) # create a new network to train network = networks.ResNet(Config.learning_rate, Config.n_blocks, Config.n_filters, Config.weight_decay) network = data_storage.net_to_device(network, Config.training_device) # create all needed folders Path(network_dir).mkdir(parents=True, exist_ok=True) Path(training_progress_dir).mkdir(parents=True, exist_ok=True) # list for the plots batches = [] policy_loss = [] value_loss = [] tot_batch_count = 0 current_batch_count = 0 current_value_loss = 0 current_policy_loss = 0 # execute the training by looping over all epochs network.train() for epoch in range(epochs): # training for state_batch, policy_batch, value_batch in training_generator: # send the data to the gpu state_batch = state_batch.to(Config.training_device, dtype=torch.float) value_batch = value_batch.unsqueeze(1).to(Config.training_device, dtype=torch.float) policy_batch = policy_batch.to(Config.training_device, dtype=torch.float) # execute one training step loss_p, loss_v = network.train_step(state_batch, policy_batch, value_batch) current_policy_loss += loss_p current_value_loss += loss_v current_batch_count += 1 tot_batch_count += 1 if tot_batch_count % 100 == 0: logger.debug("epoch {}: trained {} batches so far".format( epoch, tot_batch_count)) batches.append(tot_batch_count) policy_loss.append(current_policy_loss / current_batch_count) value_loss.append(current_value_loss / current_batch_count) current_policy_loss = 0 current_value_loss = 0 current_batch_count = 0 if tot_batch_count % 1000 == 0: network_path = "{}/network_batch_{}.pt".format( network_dir, tot_batch_count) torch.save(network, network_path) np.save(training_progress_dir + "/value_loss.npy", np.array(value_loss)) np.save(training_progress_dir + "/policy_loss.npy", np.array(policy_loss)) np.save(training_progress_dir + "/batches.npy", np.array(batches)) # save the last network network_path = "{}/network_batch_{}.pt".format(network_dir, tot_batch_count) torch.save(network, network_path) np.save(training_progress_dir + "/value_loss.npy", np.array(value_loss)) np.save(training_progress_dir + "/policy_loss.npy", np.array(policy_loss)) np.save(training_progress_dir + "/batches.npy", np.array(batches)) logger.debug("epoch {}: finished training".format(epoch)) # plot the loss versus the number of seen batches # plot the value training loss fig1 = plt.figure(1) plt.plot(batches, value_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Value Training Loss") plt.xlabel("Training Samples") plt.ylabel("Value Loss") fig1.show() # plot the training policy loss fig2 = plt.figure(2) plt.plot(batches, policy_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Policy Training Loss") plt.xlabel("Training Samples") plt.ylabel("Policy Loss") fig2.show() plt.show()
def mainTrain(): # The logger utils.init_logger(logging.DEBUG, file_name="../log/chess_sl.log") logger = logging.getLogger('Chess_SL') # set the random seed # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) logger.debug("start the main test program") # test the rise network network = networks.RiseNet(Config.learning_rate, Config.n_blocks, Config.n_se_blocks, Config.n_filters, Config.se_ratio, Config.n_mobile_filters, Config.n_filter_inc, Config.weight_decay) network = data_storage.net_to_device(network, Config.training_device) board = chess.Board() input = board_representation.board_to_matrix(board) input = torch.tensor(input) input = input.to(Config.training_device, dtype=torch.float) input = input.unsqueeze(0) res = network(input) board = chess.Board() board.push_san("g4") board.push_san("e5") board.push_san("f4") board.push_uci("d8h4") # board.push_san("Qh4") print(board.turn == chess.WHITE) list = [1, 2, 3, 4] list.remove(2) print(list) test_str = "_5_6" print(test_str.split("_")) # get the fen string of a board board = chess.Board() board.push_san("e4") board.push_san("e5") board.push_san("Nf3") board.push_san("Nc6") board.push_san("Bc4") board.push_san("Bc5") board.push_san("Qe2") board.push_san("d6") board.push_san("Nc3") board.push_san("Bd7") board.push_san("b3") board.push_san("Qe7") fen_string = board.fen() print(fen_string) print("n-bytes: ", len(fen_string.encode('utf-8'))) filter = data_processing.get_compression_filter() data_file = tables.open_file("../king-base-light-avg.h5", mode='r', filters=filter) print(data_file.root.data.shape[0]) state = data_file.root.data[2, 0:CONST.STATE_SIZE] policy_idx = int(data_file.root.data[2, -2]) value = data_file.root.data[100, -1] state = state.reshape(CONST.INPUT_CHANNELS, CONST.BOARD_HEIGHT, CONST.BOARD_WIDTH) policy = np.zeros(board_representation.LABEL_COUNT) policy[policy_idx] = 1 pgn_file = open("../pgns/KingBaseLite2019-B00-B19.pgn") game = chess.pgn.read_game(pgn_file) # read out the next game from the pgn while game is not None: result = data_processing.value_from_result(game.headers["Result"]) if result is None: print(game) game = chess.pgn.read_game( pgn_file) # read out the next game from the pgn for move in game.mainline_moves(): if move.uci() == "0000": print(game) print(move)