Esempio n. 1
0
    def nn_update(self, generation):
        """
        updates the neural network by picking a random batch form the experience replay
        :param generation:      the network generation (number of iterations so far)
        :return:                average policy and value loss over all mini batches
        """
        # setup the data set
        training_generator = self.experience_buffer.prepare_data(generation)
        step_size = training_generator.dataset.__len__() // (2 *
                                                             Config.batch_size)
        logger.info("training data prepared, step size: {}".format(step_size))

        # activate the training mode
        self.network = data_storage.net_to_device(self.network,
                                                  Config.training_device)
        if Config.cyclic_learning:
            self.network.update_scheduler(step_size)  # update the scheduler
        self.network.train()

        avg_loss_p = 0
        avg_loss_v = 0
        tot_batch_count = 0
        for epoch in range(Config.epoch_count):
            # training
            for state_batch, value_batch, policy_batch in training_generator:
                # send the data to the gpu
                state_batch = state_batch.to(Config.training_device,
                                             dtype=torch.float)
                value_batch = value_batch.unsqueeze(1).to(
                    Config.training_device, dtype=torch.float)
                policy_batch = policy_batch.to(Config.training_device,
                                               dtype=torch.float)

                # execute the training step with one batch
                if Config.cyclic_learning:
                    loss_p, loss_v = self.network.train_cyclical_step(
                        state_batch, policy_batch, value_batch)
                else:
                    loss_p, loss_v = self.network.train_step(
                        state_batch, policy_batch, value_batch)

                avg_loss_p += loss_p
                avg_loss_v += loss_v
                tot_batch_count += 1

        # calculate the mean of the loss
        avg_loss_p /= tot_batch_count
        avg_loss_v /= tot_batch_count

        # activate the evaluation mode
        self.network = data_storage.net_to_device(self.network,
                                                  Config.evaluation_device)
        self.network.eval()

        return avg_loss_p.item(), avg_loss_v.item()
def train_net(epoch_count, training_generator, csv_test_set):
    """
    trains the neural network a few times and returns the value and prediciton error
    :param epoch_count:             the number of epochs to train the neural network
    :param training_generator:      the torch training generator
    :param csv_test_set:            the test set on which the network is tested
    :return:                        prediction error, value error
    """
    logger = logging.getLogger('Sup Learning')

    # create a new network to train
    network = networks.ResNet(Config.learning_rate, Config.n_blocks,
                              Config.n_filters, Config.weight_decay)
    network = data_storage.net_to_device(network, Config.training_device)

    # execute the training by looping over all epochs
    network.train()
    for epoch in range(epoch_count):
        # training
        for state_batch, value_batch, policy_batch in training_generator:
            # send the data to the gpu
            state_batch = state_batch.to(Config.training_device)
            value_batch = value_batch.to(Config.training_device)
            policy_batch = policy_batch.to(Config.training_device)

            # execute one training step
            _, _ = network.train_step(state_batch, policy_batch, value_batch)

    # evaluation
    pred_error, val_error = evaluation.net_prediction_error(
        network, csv_test_set)
    logger.debug(
        "learning rate {}, prediction error: {}, value-error: {}".format(
            Config.learning_rate, pred_error, val_error))
    return pred_error, val_error
Esempio n. 3
0
    def __init__(self, network):
        """
        :param network:         alpha zero network that is used for training and evaluation
        """
        self.network = network  # the network
        self.experience_buffer = ExperienceBuffer(
        )  # buffer that saves all experiences

        # activate the evaluation mode of the networks
        self.network = data_storage.net_to_device(self.network,
                                                  config.evaluation_device)
        self.network.eval()
Esempio n. 4
0
def mainTrain():
    # The logger
    utils.init_logger(logging.DEBUG, file_name="log/chess_sl.log.log")
    logger = logging.getLogger('Sup Learning')

    np.set_printoptions(suppress=True, precision=6)

    # set the random seed
    random.seed(a=None, version=2)
    np.random.seed(seed=None)

    # parameters
    variant = "threeCheck"
    Config.learning_rate = 0.0001
    Config.weight_decay = 1e-4
    Config.n_blocks = 10
    Config.n_filters = 128
    epochs = 3
    training_set_path = "positions-avg" + variant + ".h5"
    network_dir = "networks/" + variant
    training_progress_dir = "training_progress/" + variant

    # define the parameters for the training
    params = {'batch_size': 512, 'shuffle': True, 'num_workers': 2}

    # create the data set class
    training_set = data_processing.Dataset(training_set_path)
    training_generator = data.DataLoader(training_set, **params)
    logger.info("training set created, length: {}".format(
        training_set.__len__()))

    # create a new network to train
    network = networks.ResNet(Config.learning_rate, Config.n_blocks,
                              Config.n_filters, Config.weight_decay)
    network = data_storage.net_to_device(network, Config.training_device)

    # create all needed folders
    Path(network_dir).mkdir(parents=True, exist_ok=True)
    Path(training_progress_dir).mkdir(parents=True, exist_ok=True)

    # list for the plots
    batches = []
    policy_loss = []
    value_loss = []
    tot_batch_count = 0
    current_batch_count = 0
    current_value_loss = 0
    current_policy_loss = 0

    # execute the training by looping over all epochs
    network.train()
    for epoch in range(epochs):
        # training
        for state_batch, policy_batch, value_batch in training_generator:
            # send the data to the gpu
            state_batch = state_batch.to(Config.training_device,
                                         dtype=torch.float)
            value_batch = value_batch.unsqueeze(1).to(Config.training_device,
                                                      dtype=torch.float)
            policy_batch = policy_batch.to(Config.training_device,
                                           dtype=torch.float)

            # execute one training step
            loss_p, loss_v = network.train_step(state_batch, policy_batch,
                                                value_batch)
            current_policy_loss += loss_p
            current_value_loss += loss_v
            current_batch_count += 1
            tot_batch_count += 1

            if tot_batch_count % 100 == 0:
                logger.debug("epoch {}: trained {} batches so far".format(
                    epoch, tot_batch_count))
                batches.append(tot_batch_count)
                policy_loss.append(current_policy_loss / current_batch_count)
                value_loss.append(current_value_loss / current_batch_count)

                current_policy_loss = 0
                current_value_loss = 0
                current_batch_count = 0

                if tot_batch_count % 1000 == 0:
                    network_path = "{}/network_batch_{}.pt".format(
                        network_dir, tot_batch_count)
                    torch.save(network, network_path)

                    np.save(training_progress_dir + "/value_loss.npy",
                            np.array(value_loss))
                    np.save(training_progress_dir + "/policy_loss.npy",
                            np.array(policy_loss))
                    np.save(training_progress_dir + "/batches.npy",
                            np.array(batches))

        # save the last network
        network_path = "{}/network_batch_{}.pt".format(network_dir,
                                                       tot_batch_count)
        torch.save(network, network_path)

        np.save(training_progress_dir + "/value_loss.npy",
                np.array(value_loss))
        np.save(training_progress_dir + "/policy_loss.npy",
                np.array(policy_loss))
        np.save(training_progress_dir + "/batches.npy", np.array(batches))

        logger.debug("epoch {}: finished training".format(epoch))

    # plot the loss versus the number of seen batches
    # plot the value training loss
    fig1 = plt.figure(1)
    plt.plot(batches, value_loss)
    axes = plt.gca()
    axes.grid(True, color=(0.9, 0.9, 0.9))
    plt.title("Average Value Training Loss")
    plt.xlabel("Training Samples")
    plt.ylabel("Value Loss")
    fig1.show()

    # plot the training policy loss
    fig2 = plt.figure(2)
    plt.plot(batches, policy_loss)
    axes = plt.gca()
    axes.grid(True, color=(0.9, 0.9, 0.9))
    plt.title("Average Policy Training Loss")
    plt.xlabel("Training Samples")
    plt.ylabel("Policy Loss")
    fig2.show()

    plt.show()
Esempio n. 5
0
def mainTrain():
    # The logger
    utils.init_logger(logging.DEBUG, file_name="../log/chess_sl.log")
    logger = logging.getLogger('Chess_SL')

    # set the random seed
    # set the random seed
    random.seed(a=None, version=2)
    np.random.seed(seed=None)

    logger.debug("start the main test program")

    # test the rise network
    network = networks.RiseNet(Config.learning_rate, Config.n_blocks,
                               Config.n_se_blocks, Config.n_filters,
                               Config.se_ratio, Config.n_mobile_filters,
                               Config.n_filter_inc, Config.weight_decay)
    network = data_storage.net_to_device(network, Config.training_device)

    board = chess.Board()
    input = board_representation.board_to_matrix(board)
    input = torch.tensor(input)
    input = input.to(Config.training_device, dtype=torch.float)
    input = input.unsqueeze(0)

    res = network(input)

    board = chess.Board()
    board.push_san("g4")
    board.push_san("e5")
    board.push_san("f4")
    board.push_uci("d8h4")
    # board.push_san("Qh4")

    print(board.turn == chess.WHITE)

    list = [1, 2, 3, 4]
    list.remove(2)
    print(list)

    test_str = "_5_6"
    print(test_str.split("_"))

    # get the fen string of a board
    board = chess.Board()
    board.push_san("e4")
    board.push_san("e5")
    board.push_san("Nf3")
    board.push_san("Nc6")
    board.push_san("Bc4")
    board.push_san("Bc5")
    board.push_san("Qe2")
    board.push_san("d6")
    board.push_san("Nc3")
    board.push_san("Bd7")
    board.push_san("b3")
    board.push_san("Qe7")
    fen_string = board.fen()
    print(fen_string)
    print("n-bytes: ", len(fen_string.encode('utf-8')))

    filter = data_processing.get_compression_filter()
    data_file = tables.open_file("../king-base-light-avg.h5",
                                 mode='r',
                                 filters=filter)

    print(data_file.root.data.shape[0])
    state = data_file.root.data[2, 0:CONST.STATE_SIZE]
    policy_idx = int(data_file.root.data[2, -2])
    value = data_file.root.data[100, -1]

    state = state.reshape(CONST.INPUT_CHANNELS, CONST.BOARD_HEIGHT,
                          CONST.BOARD_WIDTH)

    policy = np.zeros(board_representation.LABEL_COUNT)
    policy[policy_idx] = 1

    pgn_file = open("../pgns/KingBaseLite2019-B00-B19.pgn")
    game = chess.pgn.read_game(pgn_file)  # read out the next game from the pgn
    while game is not None:
        result = data_processing.value_from_result(game.headers["Result"])
        if result is None:
            print(game)

        game = chess.pgn.read_game(
            pgn_file)  # read out the next game from the pgn
        for move in game.mainline_moves():
            if move.uci() == "0000":
                print(game)
                print(move)