def train_net(epoch_count, training_generator, csv_test_set):
    """
    trains the neural network a few times and returns the value and prediciton error
    :param epoch_count:             the number of epochs to train the neural network
    :param training_generator:      the torch training generator
    :param csv_test_set:            the test set on which the network is tested
    :return:                        prediction error, value error
    """
    logger = logging.getLogger('Sup Learning')

    # create a new network to train
    network = networks.ResNet(Config.learning_rate, Config.n_blocks,
                              Config.n_filters, Config.weight_decay)
    network = data_storage.net_to_device(network, Config.training_device)

    # execute the training by looping over all epochs
    network.train()
    for epoch in range(epoch_count):
        # training
        for state_batch, value_batch, policy_batch in training_generator:
            # send the data to the gpu
            state_batch = state_batch.to(Config.training_device)
            value_batch = value_batch.to(Config.training_device)
            policy_batch = policy_batch.to(Config.training_device)

            # execute one training step
            _, _ = network.train_step(state_batch, policy_batch, value_batch)

    # evaluation
    pred_error, val_error = evaluation.net_prediction_error(
        network, csv_test_set)
    logger.debug(
        "learning rate {}, prediction error: {}, value-error: {}".format(
            Config.learning_rate, pred_error, val_error))
    return pred_error, val_error
Example #2
0
    def __init__(self, *args, **kwargs):
        super(FastResNet, self).__init__(*args, **kwargs)


        self.encoder = networks.ResNet(layers=50,pretrained=False).to(self.device)
        self.decoder = networks.choose_decoder(decoder=self.name).to(self.device)
    # def infer(self,input):
    #     pass
        self.encoder.eval()
        self.decoder.eval()
        print('==> model name:{}\nfeed_height:{}\nfeed_width:{}\n'.format(self.name,self.feed_height,self.feed_width))
Example #3
0
def main():
    # The logger
    utils.init_logger(logging.DEBUG, file_name="log/connect4.log")
    logger = logging.getLogger('Connect4')

    # set the random seed
    random.seed(a=None, version=2)

    # create the configuration values for a random network
    network_path = "self-play-net.pt"
    Config.n_blocks = 1
    Config.n_filters = 1
    Config.mcts_sim_count = 200
    loops = 10
    games_per_loop = 1000

    # create the agent
    logger.info("create a new random network for the self play")
    network = networks.ResNet(Config.learning_rate, Config.n_blocks,
                              Config.n_filters, Config.weight_decay)
    torch.save({'state_dict': network.state_dict()}, network_path)

    # play self-play games
    logger.info("start to create self-play games")
    start = time.time()
    training_examples = []

    for i in range(loops):
        new_examples = alpha_zero_learning.__self_play_worker__(
            network_path, games_per_loop)
        training_examples.extend(new_examples)
        logger.debug("finished creating games in loop {}".format(i))

    # save the training examples
    with open("initial_training_data.pkl", 'wb') as output:
        pickle.dump(training_examples, output, pickle.HIGHEST_PROTOCOL)

    logger.info(
        "finished creating the initial training examples, length: {}".format(
            len(training_examples)))
    average_length = 0.5 * len(training_examples) / (
        games_per_loop * loops
    )  # 0.5 as symmetric positions are included as well
    logger.debug("average moves per game: {}".format(average_length))
    logger.debug("elapsed time: {}".format(time.time() - start))
Example #4
0
def mainGui():
    # The logger
    utils.init_logger(logging.DEBUG, file_name="log/gui.log")
    logger = logging.getLogger('Gui')

    net_path = "network_gen_148.pt"
    n_blocks = 10
    n_filters = 128

    np.set_printoptions(suppress=True, precision=2)


    # load the network
    Config.evaluation_device = torch.device('cpu')
    cpu_net = networks.ResNet(1e-4, n_blocks, n_filters, 1e-4)
    checkpoint = torch.load(net_path, map_location='cpu')
    cpu_net.load_state_dict(checkpoint['state_dict'])
    logger.debug("network loaded")


    # execute the game
    gui = connect4_gui.GUI(cpu_net)
    gui.execute_game()
Example #5
0
def mainTrain():
    # The logger
    utils.init_logger(logging.DEBUG, file_name="log/chess_sl.log.log")
    logger = logging.getLogger('Sup Learning')

    np.set_printoptions(suppress=True, precision=6)

    # set the random seed
    random.seed(a=None, version=2)
    np.random.seed(seed=None)

    # parameters
    variant = "threeCheck"
    Config.learning_rate = 0.0001
    Config.weight_decay = 1e-4
    Config.n_blocks = 10
    Config.n_filters = 128
    epochs = 3
    training_set_path = "positions-avg" + variant + ".h5"
    network_dir = "networks/" + variant
    training_progress_dir = "training_progress/" + variant

    # define the parameters for the training
    params = {'batch_size': 512, 'shuffle': True, 'num_workers': 2}

    # create the data set class
    training_set = data_processing.Dataset(training_set_path)
    training_generator = data.DataLoader(training_set, **params)
    logger.info("training set created, length: {}".format(
        training_set.__len__()))

    # create a new network to train
    network = networks.ResNet(Config.learning_rate, Config.n_blocks,
                              Config.n_filters, Config.weight_decay)
    network = data_storage.net_to_device(network, Config.training_device)

    # create all needed folders
    Path(network_dir).mkdir(parents=True, exist_ok=True)
    Path(training_progress_dir).mkdir(parents=True, exist_ok=True)

    # list for the plots
    batches = []
    policy_loss = []
    value_loss = []
    tot_batch_count = 0
    current_batch_count = 0
    current_value_loss = 0
    current_policy_loss = 0

    # execute the training by looping over all epochs
    network.train()
    for epoch in range(epochs):
        # training
        for state_batch, policy_batch, value_batch in training_generator:
            # send the data to the gpu
            state_batch = state_batch.to(Config.training_device,
                                         dtype=torch.float)
            value_batch = value_batch.unsqueeze(1).to(Config.training_device,
                                                      dtype=torch.float)
            policy_batch = policy_batch.to(Config.training_device,
                                           dtype=torch.float)

            # execute one training step
            loss_p, loss_v = network.train_step(state_batch, policy_batch,
                                                value_batch)
            current_policy_loss += loss_p
            current_value_loss += loss_v
            current_batch_count += 1
            tot_batch_count += 1

            if tot_batch_count % 100 == 0:
                logger.debug("epoch {}: trained {} batches so far".format(
                    epoch, tot_batch_count))
                batches.append(tot_batch_count)
                policy_loss.append(current_policy_loss / current_batch_count)
                value_loss.append(current_value_loss / current_batch_count)

                current_policy_loss = 0
                current_value_loss = 0
                current_batch_count = 0

                if tot_batch_count % 1000 == 0:
                    network_path = "{}/network_batch_{}.pt".format(
                        network_dir, tot_batch_count)
                    torch.save(network, network_path)

                    np.save(training_progress_dir + "/value_loss.npy",
                            np.array(value_loss))
                    np.save(training_progress_dir + "/policy_loss.npy",
                            np.array(policy_loss))
                    np.save(training_progress_dir + "/batches.npy",
                            np.array(batches))

        # save the last network
        network_path = "{}/network_batch_{}.pt".format(network_dir,
                                                       tot_batch_count)
        torch.save(network, network_path)

        np.save(training_progress_dir + "/value_loss.npy",
                np.array(value_loss))
        np.save(training_progress_dir + "/policy_loss.npy",
                np.array(policy_loss))
        np.save(training_progress_dir + "/batches.npy", np.array(batches))

        logger.debug("epoch {}: finished training".format(epoch))

    # plot the loss versus the number of seen batches
    # plot the value training loss
    fig1 = plt.figure(1)
    plt.plot(batches, value_loss)
    axes = plt.gca()
    axes.grid(True, color=(0.9, 0.9, 0.9))
    plt.title("Average Value Training Loss")
    plt.xlabel("Training Samples")
    plt.ylabel("Value Loss")
    fig1.show()

    # plot the training policy loss
    fig2 = plt.figure(2)
    plt.plot(batches, policy_loss)
    axes = plt.gca()
    axes.grid(True, color=(0.9, 0.9, 0.9))
    plt.title("Average Policy Training Loss")
    plt.xlabel("Training Samples")
    plt.ylabel("Policy Loss")
    fig2.show()

    plt.show()
Example #6
0
def main_az():
    # The logger
    utils.init_logger(logging.DEBUG, file_name="log/connect4.log")
    logger = logging.getLogger('Connect4')

    # set the random seed
    random.seed(a=None, version=2)
    np.random.seed(seed=None)

    # create the storage object
    training_data = data_storage.load_data()

    # create the agent
    network = networks.ResNet(Config.learning_rate, Config.n_blocks,
                              Config.n_filters, Config.weight_decay)
    agent = alpha_zero_learning.Agent(network)

    if training_data.cycle == 0:
        logger.debug("create a new agent")
        training_data.save_data(agent.network)  # save the generation 0 network

        if Config.use_initial_data:
            logger.debug("fill the experience buffer with some initial data")
            agent.experience_buffer.fill_with_initial_data(
            )  # add training examples of untrained network

    else:
        # load the current network
        logger.debug("load an old network")
        agent.network = training_data.load_current_net()
        agent.experience_buffer = training_data.experience_buffer

    start_training = time.time()
    for i in range(training_data.cycle, Config.cycle_count, 1):
        ###### self play and update: create some game data through self play
        logger.info("start playing games in cycle {}".format(i))
        avg_moves_played = agent.play_self_play_games(
            training_data.network_path)
        training_data.avg_moves_played.append(avg_moves_played)
        logger.debug("average moves played: {}".format(avg_moves_played))

        ###### training, train the training network and use the target network for predictions
        logger.info("start updates in cycle {}".format(i))
        loss_p, loss_v = agent.nn_update(i)
        training_data.policy_loss.append(loss_p)
        training_data.value_loss.append(loss_v)
        logger.debug("policy loss: {}".format(loss_p))
        logger.debug("value loss: {}".format(loss_v))

        ###### save the new network
        logger.info("save check point to file in cycle {}".format(i))
        training_data.cycle += 1
        training_data.experience_buffer = agent.experience_buffer
        training_data.save_data(agent.network)

    end_training = time.time()
    training_time = end_training - start_training
    logger.info("elapsed time whole training process {}".format(training_time))

    # save the results
    np.save("value_loss.npy", np.array(training_data.value_loss))
    np.save("policy_loss.npy", np.array(training_data.policy_loss))
    np.save("avg_moves.npy", np.array(training_data.avg_moves_played))

    # set the style of the plot
    plt.style.use('seaborn-dark-palette')

    # plot the value training loss
    fig1 = plt.figure(1)
    plt.plot(training_data.value_loss)
    axes = plt.gca()
    axes.grid(True, color=(0.9, 0.9, 0.9))
    plt.title("Average Value Training Loss")
    plt.xlabel("Generation")
    plt.ylabel("Value Loss")
    fig1.show()

    # plot the training policy loss
    fig2 = plt.figure(2)
    plt.plot(training_data.policy_loss)
    axes = plt.gca()
    axes.grid(True, color=(0.9, 0.9, 0.9))
    plt.title("Average Policy Training Loss")
    plt.xlabel("Generation")
    plt.ylabel("Policy Loss")
    fig2.show()

    # plot the average number of moves played in the self-play games
    fig3 = plt.figure(3)
    plt.plot(training_data.avg_moves_played)
    axes = plt.gca()
    axes.grid(True, color=(0.9, 0.9, 0.9))
    plt.title("Average Moves in Self-Play Games")
    plt.xlabel("Generation")
    plt.ylabel("Move Count")
    fig3.show()

    plt.show()