Exemple #1
0
def selfPlay(selfPlayPool, model):
    t1 = time.time()

    computeTable = PreComputation.computePredictionTable(
        model) if Hyperparameters.USE_PREDICTION_CACHE else {}
    selfPlayPool.runJobs(
        [(Hyperparameters.AMOUNT_OF_GAMES_PER_WORKER, computeTable)] *
        Hyperparameters.AMOUNT_OF_SELF_PLAY_WORKERS)

    PredictionOracle.runPredictionOracle(
        model, selfPlayPool)  # Enter Oracle mode on the main thread

    print("Self play finished: {} ms".format(time.time() - t1))
    MemoryBuffers.storeTrainingDataToDisk()
Exemple #2
0
def loopingTrainer(port, gpuSettings):
    connection, modelAbsPath = _init(port)

    import os, StartInit
    StartInit.init()

    print("Starting Trainer GPU-Settings: {}".format(gpuSettings))
    os.environ['CUDA_VISIBLE_DEVICES'] = gpuSettings
    from Main.AlphaZero import NeuralNetworks
    import numpy as np
    import keras

    MachineSpecificSettings.setupHyperparameters()
    singleModel = keras.models.load_model(modelAbsPath)

    # In our experiments we ended up using only a single GPU for training. Since a to big batch-size gave weird results
    if (MachineSpecificSettings.AMOUNT_OF_GPUS > 1):
        trainingModel = NeuralNetworks.createMultipleGPUModel(singleModel)
    else:
        trainingModel = singleModel

    # Training Loop
    while (True):
        status, data = connection.readMessage()
        print("Got msg:", status)

        if (status == STATUS_TRAIN_DATA
            ):  # TODO: Create an informative else statement
            t1 = time.time(
            )  # Only used for displaying elapsed time to the user
            modelVersion, states, values, policies, weights = data

            # Setup settings for this training turn
            keras.backend.set_value(trainingModel.optimizer.lr,
                                    _getLearningRate(modelVersion))
            MemoryBuffers.CURRENT_MODEL_VERSION = modelVersion
            MemoryBuffers.addLabelsToReplayBuffer(states, values, policies)

            # Get all the data contained in the Replay Buffers. With pre-calculated average of similair states
            inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData(
            )
            s = np.array(inStates)
            v = np.array(valueLabels)
            p = np.array(policyLabels)

            # Run the supervised-learning
            dataProcessingTime = time.time() - t1
            print("Data preprocessing finished: {}".format(dataProcessingTime))
            print("Using LR:",
                  keras.backend.get_value(trainingModel.optimizer.lr))
            trainingModel.fit([np.array(s), np.array(p)],
                              np.array(v),
                              epochs=Hyperparameters.EPOCHS_PER_TRAINING,
                              batch_size=Hyperparameters.MINI_BATCH_SIZE,
                              verbose=2,
                              shuffle=True)

            singleModel.save(modelAbsPath, overwrite=True)
            singleModel.save(Hyperparameters.MODELS_SAVE_PATH +
                             str(modelVersion + 1))
            trainedModelAsBytes = _readModelFromDisk()

            print("Training finished:", time.time() - t1)
            connection.sendMessage("Finished", (trainedModelAsBytes, ))

            MemoryBuffers.storeTrainingDataToDisk()