Python MemoryBuffers.addLabelsToReplayBuffer Examples

Programming Language: Python

Namespace/Package Name: Main.Training.Connect4

Class/Type: MemoryBuffers

Method/Function: addLabelsToReplayBuffer

Examples at hotexamples.com: 3

Python MemoryBuffers.addLabelsToReplayBuffer - 3 examples found. These are the top rated real world Python examples of Main.Training.Connect4.MemoryBuffers.addLabelsToReplayBuffer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getDistinctTrainingData(4)

addLabelsToReplayBuffer(3)

clearReplayBuffer(2)

getAllTrainingData(2)

loadOldTrainingDataFromDisk(2)

storeTrainingDataToDisk(2)

getAmountOfStoredDataPoints(1)

Example #1

Show file

File: SelfPlay.py Project: ikaroszhang96/Convex-AlphaZero

def _replayWatcher(connections, dumpPipe):
    print("Starting replay watcher")
    collectedGamesThisCycle = 0
    MemoryBuffers.clearReplayBuffer()
    startTimeSelfPlay = time.time()

    while (True):
        msg, data = dumpPipe.get()  # Data passed from a listener

        if (msg == Constants.RemoteProtocol.DUMP_REPLAY_DATA_TO_OVERLORD):
            amountOfGames, states, evals, polices, weights = data
            MemoryBuffers.addLabelsToReplayBuffer(states, evals, polices)
            collectedGamesThisCycle += amountOfGames

            # Display a formatted message
            cycleProgressMsg = "{} / {}".format(
                collectedGamesThisCycle,
                Hyperparameters.AMOUNT_OF_NEW_GAMES_PER_CYCLE)
            elapsedTime = np.around(time.time() - startTimeSelfPlay, 3)
            elapsedTimeMsg = "Time: {}".format(elapsedTime)
            gamesPerSecondMsg = "Games/Sec: {}".format(
                np.around(collectedGamesThisCycle / elapsedTime, 3))
            print(cycleProgressMsg + "\t\t" + elapsedTimeMsg + "\t\t" +
                  gamesPerSecondMsg)

            # Upon receving sufficent number of games we send a message to all Remote Workers to abort
            if (collectedGamesThisCycle >=
                    Hyperparameters.AMOUNT_OF_NEW_GAMES_PER_CYCLE):
                _stopRemoteWorkers(connections)
                return

Example #2

Show file

def sendToOverlord(overlordConnection, localPipe, amountOfWorkers, endPipe):
    # Needed in the end when we wish to count the bitmaps
    import time
    time.sleep(3)
    print("Starting init")
    import StartInit
    StartInit.init()

    runningCycle = True
    amountOfCollectedGames = 0
    amountOfCollectedWorkers = 0
    collectedVisitedStates = []

    while (runningCycle):
        tupleMsg = localPipe.get()
        msgType = tupleMsg[0]

        if (msgType == C.LocalWorkerProtocol.DUMP_TO_REPLAY_BUFFER):
            _, amountOfGames, states, evals, polices, weights = tupleMsg
            MemoryBuffers.addLabelsToReplayBuffer(states, evals, polices)
            amountOfCollectedGames += amountOfGames

            if (amountOfCollectedGames >=
                    MachineSpecificSettings.GAMES_BATCH_SIZE_TO_OVERLORD):
                print("Sending to oracle from dataworker")
                dStates, dEvals, dPolices, dWeights = MemoryBuffers.getAllTrainingData(
                )
                dumpMsg = (amountOfCollectedGames, dStates, dEvals, dPolices,
                           dWeights)
                overlordConnection.sendMessage(
                    C.RemoteProtocol.DUMP_REPLAY_DATA_TO_OVERLORD, dumpMsg)

                amountOfCollectedGames = 0
                MemoryBuffers.clearReplayBuffer()

        elif (msgType == C.LocalWorkerProtocol.DUMP_MOST_VISITED_STATES):
            amountOfCollectedWorkers += 1
            _, states = tupleMsg

            if (amountOfCollectedWorkers >= amountOfWorkers):
                print("collected states from all local workers: ",
                      len(collectedVisitedStates))
                sendMostVisitedStatesToOverlord(overlordConnection,
                                                collectedVisitedStates)
                print("Sent message to all workers")
                runningCycle = False

    endPipe.put("Ending by datamanager")
    print("Ending sending thread")

Example #3

Show file

def loopingTrainer(port, gpuSettings):
    connection, modelAbsPath = _init(port)

    import os, StartInit
    StartInit.init()

    print("Starting Trainer GPU-Settings: {}".format(gpuSettings))
    os.environ['CUDA_VISIBLE_DEVICES'] = gpuSettings
    from Main.AlphaZero import NeuralNetworks
    import numpy as np
    import keras

    MachineSpecificSettings.setupHyperparameters()
    singleModel = keras.models.load_model(modelAbsPath)

    # In our experiments we ended up using only a single GPU for training. Since a to big batch-size gave weird results
    if (MachineSpecificSettings.AMOUNT_OF_GPUS > 1):
        trainingModel = NeuralNetworks.createMultipleGPUModel(singleModel)
    else:
        trainingModel = singleModel

    # Training Loop
    while (True):
        status, data = connection.readMessage()
        print("Got msg:", status)

        if (status == STATUS_TRAIN_DATA
            ):  # TODO: Create an informative else statement
            t1 = time.time(
            )  # Only used for displaying elapsed time to the user
            modelVersion, states, values, policies, weights = data

            # Setup settings for this training turn
            keras.backend.set_value(trainingModel.optimizer.lr,
                                    _getLearningRate(modelVersion))
            MemoryBuffers.CURRENT_MODEL_VERSION = modelVersion
            MemoryBuffers.addLabelsToReplayBuffer(states, values, policies)

            # Get all the data contained in the Replay Buffers. With pre-calculated average of similair states
            inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData(
            )
            s = np.array(inStates)
            v = np.array(valueLabels)
            p = np.array(policyLabels)

            # Run the supervised-learning
            dataProcessingTime = time.time() - t1
            print("Data preprocessing finished: {}".format(dataProcessingTime))
            print("Using LR:",
                  keras.backend.get_value(trainingModel.optimizer.lr))
            trainingModel.fit([np.array(s), np.array(p)],
                              np.array(v),
                              epochs=Hyperparameters.EPOCHS_PER_TRAINING,
                              batch_size=Hyperparameters.MINI_BATCH_SIZE,
                              verbose=2,
                              shuffle=True)

            singleModel.save(modelAbsPath, overwrite=True)
            singleModel.save(Hyperparameters.MODELS_SAVE_PATH +
                             str(modelVersion + 1))
            trainedModelAsBytes = _readModelFromDisk()

            print("Training finished:", time.time() - t1)
            connection.sendMessage("Finished", (trainedModelAsBytes, ))

            MemoryBuffers.storeTrainingDataToDisk()