Ejemplos de MemoryBuffers en Python, ejemplos de Main.Training.Connect4.MemoryBuffers en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: SelfPlay.py Proyecto: ikaroszhang96/Convex-AlphaZero

def _replayWatcher(connections, dumpPipe):
    print("Starting replay watcher")
    collectedGamesThisCycle = 0
    MemoryBuffers.clearReplayBuffer()
    startTimeSelfPlay = time.time()

    while (True):
        msg, data = dumpPipe.get()  # Data passed from a listener

        if (msg == Constants.RemoteProtocol.DUMP_REPLAY_DATA_TO_OVERLORD):
            amountOfGames, states, evals, polices, weights = data
            MemoryBuffers.addLabelsToReplayBuffer(states, evals, polices)
            collectedGamesThisCycle += amountOfGames

            # Display a formatted message
            cycleProgressMsg = "{} / {}".format(
                collectedGamesThisCycle,
                Hyperparameters.AMOUNT_OF_NEW_GAMES_PER_CYCLE)
            elapsedTime = np.around(time.time() - startTimeSelfPlay, 3)
            elapsedTimeMsg = "Time: {}".format(elapsedTime)
            gamesPerSecondMsg = "Games/Sec: {}".format(
                np.around(collectedGamesThisCycle / elapsedTime, 3))
            print(cycleProgressMsg + "\t\t" + elapsedTimeMsg + "\t\t" +
                  gamesPerSecondMsg)

            # Upon receving sufficent number of games we send a message to all Remote Workers to abort
            if (collectedGamesThisCycle >=
                    Hyperparameters.AMOUNT_OF_NEW_GAMES_PER_CYCLE):
                _stopRemoteWorkers(connections)
                return

Ejemplo n.º 2

0

Mostrar archivo

def selfPlay(selfPlayPool, model):
    t1 = time.time()

    computeTable = PreComputation.computePredictionTable(
        model) if Hyperparameters.USE_PREDICTION_CACHE else {}
    selfPlayPool.runJobs(
        [(Hyperparameters.AMOUNT_OF_GAMES_PER_WORKER, computeTable)] *
        Hyperparameters.AMOUNT_OF_SELF_PLAY_WORKERS)

    PredictionOracle.runPredictionOracle(
        model, selfPlayPool)  # Enter Oracle mode on the main thread

    print("Self play finished: {} ms".format(time.time() - t1))
    MemoryBuffers.storeTrainingDataToDisk()

Ejemplo n.º 3

0

Mostrar archivo

def sendToOverlord(overlordConnection, localPipe, amountOfWorkers, endPipe):
    # Needed in the end when we wish to count the bitmaps
    import time
    time.sleep(3)
    print("Starting init")
    import StartInit
    StartInit.init()

    runningCycle = True
    amountOfCollectedGames = 0
    amountOfCollectedWorkers = 0
    collectedVisitedStates = []

    while (runningCycle):
        tupleMsg = localPipe.get()
        msgType = tupleMsg[0]

        if (msgType == C.LocalWorkerProtocol.DUMP_TO_REPLAY_BUFFER):
            _, amountOfGames, states, evals, polices, weights = tupleMsg
            MemoryBuffers.addLabelsToReplayBuffer(states, evals, polices)
            amountOfCollectedGames += amountOfGames

            if (amountOfCollectedGames >=
                    MachineSpecificSettings.GAMES_BATCH_SIZE_TO_OVERLORD):
                print("Sending to oracle from dataworker")
                dStates, dEvals, dPolices, dWeights = MemoryBuffers.getAllTrainingData(
                )
                dumpMsg = (amountOfCollectedGames, dStates, dEvals, dPolices,
                           dWeights)
                overlordConnection.sendMessage(
                    C.RemoteProtocol.DUMP_REPLAY_DATA_TO_OVERLORD, dumpMsg)

                amountOfCollectedGames = 0
                MemoryBuffers.clearReplayBuffer()

        elif (msgType == C.LocalWorkerProtocol.DUMP_MOST_VISITED_STATES):
            amountOfCollectedWorkers += 1
            _, states = tupleMsg

            if (amountOfCollectedWorkers >= amountOfWorkers):
                print("collected states from all local workers: ",
                      len(collectedVisitedStates))
                sendMostVisitedStatesToOverlord(overlordConnection,
                                                collectedVisitedStates)
                print("Sent message to all workers")
                runningCycle = False

    endPipe.put("Ending by datamanager")
    print("Ending sending thread")

Ejemplo n.º 4

0

Mostrar archivo

def _init(port):
    connection = Connection.Connection(ip='localhost', port=port, server=False)
    status, data = connection.readMessage()
    assert status == STATUS_INIT_MODEL

    modelAsBytes, trainerSettings = data
    modelAbsPath = _writeModelToDiskAsBytes(modelAsBytes)
    Hyperparameters.REPLAY_BUFFER_LENGTH = trainerSettings[0]
    Hyperparameters.SLIDING_WINDOW_TURNS_TO_FULL = trainerSettings[1]

    # Used for naming the runtime analasys log
    if ("Y" in input("Use old training data (Y/N):").upper()):
        MemoryBuffers.loadOldTrainingDataFromDisk()

    return connection, modelAbsPath

Ejemplo n.º 5

0

Mostrar archivo

def benchmark():
    import RootDir

    print("Loading training data...")
    MemoryBuffers.loadOldTrainingDataFromDisk()
    absPath = RootDir.getAbsolutePath(input("ModelName: "))
    gpuSettings = input("Gpu Settings: ")
    t1 = time.time()

    dStates, dEvals, dPolics = MemoryBuffers.getDistinctTrainingData()
    print("Data pre-processing finished:", time.time() - t1)

    useMultipleModels = MachineSpecificSettings.AMOUNT_OF_GPUS > 1
    _fitModelProc(absPath, useMultipleModels, gpuSettings, 0, dStates, dEvals,
                  dPolics, t1)
    print("Full training finished:", time.time() - t1)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: FitAndEvaluateModel.py Proyecto: ikaroszhang96/Convex-AlphaZero

def fitAndEvaluateModel(model, currentModelVersion):
    global CURRENT_CONTENDER_VERSION
    # This should already be done, but just to be safe. There's no danger in overriding this,
    # since every other worker should already have the latest weights and won't update since we don't increase the
    # currentModelVersion
    model.save_weights(Hyperparameters.CURRENT_MODEL_WEIGHTS_PATH,
                       overwrite=True)

    t1 = time.time()
    # Change to the contender version before training
    if (os.path.isfile(Hyperparameters.CONTENDER_MODEL_WEIGHTS_PATH)):
        print("Loading old contender model...")
        model.load_weights(Hyperparameters.CONTENDER_MODEL_WEIGHTS_PATH)

    # In the paper the sample training data from their training buffer, here we just run through all our current samples
    #inStates, valueLabels, policyLabels = MemoryBuffers.getAllTrainingData()
    inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData(
    )
    model.fit(
        np.array(inStates),
        [np.array(valueLabels), np.array(policyLabels)],
        epochs=Hyperparameters.EPOCHS_PER_TRAINING,
        batch_size=Hyperparameters.MINI_BATCH_SIZE,
        verbose=2,
        shuffle=True)

    print("Training finished: {} ms".format(time.time() - t1))

    return currentModelVersion + 1

Ejemplo n.º 7

0

Mostrar archivo

Archivo: FitAndEvaluateModel.py Proyecto: ikaroszhang96/Convex-AlphaZero

def evaluateModelAgainstTrainingData(model):
    inStates, valueLabels, policyLabels = MemoryBuffers.getAllTrainingData()
    _, valueLoss, policyLoss = model.evaluate(
        np.array(inStates),
        [np.array(valueLabels), np.array(policyLabels)],
        verbose=2,
        shuffle=True)
    print("ValueLoss: {}  PolicyLoss: {}".format(valueLoss, policyLoss))

Ejemplo n.º 8

0

Mostrar archivo

def fitModel(modelAbsPath, gpuSettings, modelGeneration, startTime):
    import numpy as np
    print("Stored data points: ", MemoryBuffers.getAmountOfStoredDataPoints())
    t1 = time.time()
    inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData(
    )

    s = np.array(inStates)
    v = np.array(valueLabels)
    p = np.array(policyLabels)
    dataProcessingTime = time.time() - t1
    print("Data preprocessing finished: {}".format(dataProcessingTime))

    if (MachineSpecificSettings.REMOTE_WORKER_AND_TRAINER
            and dataProcessingTime < 5):
        print("Waiting for GPU")
        time.sleep(5 - dataProcessingTime)

    multipleGPUs = MachineSpecificSettings.AMOUNT_OF_GPUS > 1
    proc = mp.Process(target=_fitModelProc,
                      args=(modelAbsPath, multipleGPUs, gpuSettings,
                            modelGeneration, s, v, p, startTime))
    proc.start()
    proc.join()

Ejemplo n.º 9

0

Mostrar archivo

def loopingTrainer(port, gpuSettings):
    connection, modelAbsPath = _init(port)

    import os, StartInit
    StartInit.init()

    print("Starting Trainer GPU-Settings: {}".format(gpuSettings))
    os.environ['CUDA_VISIBLE_DEVICES'] = gpuSettings
    from Main.AlphaZero import NeuralNetworks
    import numpy as np
    import keras

    MachineSpecificSettings.setupHyperparameters()
    singleModel = keras.models.load_model(modelAbsPath)

    # In our experiments we ended up using only a single GPU for training. Since a to big batch-size gave weird results
    if (MachineSpecificSettings.AMOUNT_OF_GPUS > 1):
        trainingModel = NeuralNetworks.createMultipleGPUModel(singleModel)
    else:
        trainingModel = singleModel

    # Training Loop
    while (True):
        status, data = connection.readMessage()
        print("Got msg:", status)

        if (status == STATUS_TRAIN_DATA
            ):  # TODO: Create an informative else statement
            t1 = time.time(
            )  # Only used for displaying elapsed time to the user
            modelVersion, states, values, policies, weights = data

            # Setup settings for this training turn
            keras.backend.set_value(trainingModel.optimizer.lr,
                                    _getLearningRate(modelVersion))
            MemoryBuffers.CURRENT_MODEL_VERSION = modelVersion
            MemoryBuffers.addLabelsToReplayBuffer(states, values, policies)

            # Get all the data contained in the Replay Buffers. With pre-calculated average of similair states
            inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData(
            )
            s = np.array(inStates)
            v = np.array(valueLabels)
            p = np.array(policyLabels)

            # Run the supervised-learning
            dataProcessingTime = time.time() - t1
            print("Data preprocessing finished: {}".format(dataProcessingTime))
            print("Using LR:",
                  keras.backend.get_value(trainingModel.optimizer.lr))
            trainingModel.fit([np.array(s), np.array(p)],
                              np.array(v),
                              epochs=Hyperparameters.EPOCHS_PER_TRAINING,
                              batch_size=Hyperparameters.MINI_BATCH_SIZE,
                              verbose=2,
                              shuffle=True)

            singleModel.save(modelAbsPath, overwrite=True)
            singleModel.save(Hyperparameters.MODELS_SAVE_PATH +
                             str(modelVersion + 1))
            trainedModelAsBytes = _readModelFromDisk()

            print("Training finished:", time.time() - t1)
            connection.sendMessage("Finished", (trainedModelAsBytes, ))

            MemoryBuffers.storeTrainingDataToDisk()