def _replayWatcher(connections, dumpPipe): print("Starting replay watcher") collectedGamesThisCycle = 0 MemoryBuffers.clearReplayBuffer() startTimeSelfPlay = time.time() while (True): msg, data = dumpPipe.get() # Data passed from a listener if (msg == Constants.RemoteProtocol.DUMP_REPLAY_DATA_TO_OVERLORD): amountOfGames, states, evals, polices, weights = data MemoryBuffers.addLabelsToReplayBuffer(states, evals, polices) collectedGamesThisCycle += amountOfGames # Display a formatted message cycleProgressMsg = "{} / {}".format( collectedGamesThisCycle, Hyperparameters.AMOUNT_OF_NEW_GAMES_PER_CYCLE) elapsedTime = np.around(time.time() - startTimeSelfPlay, 3) elapsedTimeMsg = "Time: {}".format(elapsedTime) gamesPerSecondMsg = "Games/Sec: {}".format( np.around(collectedGamesThisCycle / elapsedTime, 3)) print(cycleProgressMsg + "\t\t" + elapsedTimeMsg + "\t\t" + gamesPerSecondMsg) # Upon receving sufficent number of games we send a message to all Remote Workers to abort if (collectedGamesThisCycle >= Hyperparameters.AMOUNT_OF_NEW_GAMES_PER_CYCLE): _stopRemoteWorkers(connections) return
def selfPlay(selfPlayPool, model): t1 = time.time() computeTable = PreComputation.computePredictionTable( model) if Hyperparameters.USE_PREDICTION_CACHE else {} selfPlayPool.runJobs( [(Hyperparameters.AMOUNT_OF_GAMES_PER_WORKER, computeTable)] * Hyperparameters.AMOUNT_OF_SELF_PLAY_WORKERS) PredictionOracle.runPredictionOracle( model, selfPlayPool) # Enter Oracle mode on the main thread print("Self play finished: {} ms".format(time.time() - t1)) MemoryBuffers.storeTrainingDataToDisk()
def sendToOverlord(overlordConnection, localPipe, amountOfWorkers, endPipe): # Needed in the end when we wish to count the bitmaps import time time.sleep(3) print("Starting init") import StartInit StartInit.init() runningCycle = True amountOfCollectedGames = 0 amountOfCollectedWorkers = 0 collectedVisitedStates = [] while (runningCycle): tupleMsg = localPipe.get() msgType = tupleMsg[0] if (msgType == C.LocalWorkerProtocol.DUMP_TO_REPLAY_BUFFER): _, amountOfGames, states, evals, polices, weights = tupleMsg MemoryBuffers.addLabelsToReplayBuffer(states, evals, polices) amountOfCollectedGames += amountOfGames if (amountOfCollectedGames >= MachineSpecificSettings.GAMES_BATCH_SIZE_TO_OVERLORD): print("Sending to oracle from dataworker") dStates, dEvals, dPolices, dWeights = MemoryBuffers.getAllTrainingData( ) dumpMsg = (amountOfCollectedGames, dStates, dEvals, dPolices, dWeights) overlordConnection.sendMessage( C.RemoteProtocol.DUMP_REPLAY_DATA_TO_OVERLORD, dumpMsg) amountOfCollectedGames = 0 MemoryBuffers.clearReplayBuffer() elif (msgType == C.LocalWorkerProtocol.DUMP_MOST_VISITED_STATES): amountOfCollectedWorkers += 1 _, states = tupleMsg if (amountOfCollectedWorkers >= amountOfWorkers): print("collected states from all local workers: ", len(collectedVisitedStates)) sendMostVisitedStatesToOverlord(overlordConnection, collectedVisitedStates) print("Sent message to all workers") runningCycle = False endPipe.put("Ending by datamanager") print("Ending sending thread")
def _init(port): connection = Connection.Connection(ip='localhost', port=port, server=False) status, data = connection.readMessage() assert status == STATUS_INIT_MODEL modelAsBytes, trainerSettings = data modelAbsPath = _writeModelToDiskAsBytes(modelAsBytes) Hyperparameters.REPLAY_BUFFER_LENGTH = trainerSettings[0] Hyperparameters.SLIDING_WINDOW_TURNS_TO_FULL = trainerSettings[1] # Used for naming the runtime analasys log if ("Y" in input("Use old training data (Y/N):").upper()): MemoryBuffers.loadOldTrainingDataFromDisk() return connection, modelAbsPath
def benchmark(): import RootDir print("Loading training data...") MemoryBuffers.loadOldTrainingDataFromDisk() absPath = RootDir.getAbsolutePath(input("ModelName: ")) gpuSettings = input("Gpu Settings: ") t1 = time.time() dStates, dEvals, dPolics = MemoryBuffers.getDistinctTrainingData() print("Data pre-processing finished:", time.time() - t1) useMultipleModels = MachineSpecificSettings.AMOUNT_OF_GPUS > 1 _fitModelProc(absPath, useMultipleModels, gpuSettings, 0, dStates, dEvals, dPolics, t1) print("Full training finished:", time.time() - t1)
def fitAndEvaluateModel(model, currentModelVersion): global CURRENT_CONTENDER_VERSION # This should already be done, but just to be safe. There's no danger in overriding this, # since every other worker should already have the latest weights and won't update since we don't increase the # currentModelVersion model.save_weights(Hyperparameters.CURRENT_MODEL_WEIGHTS_PATH, overwrite=True) t1 = time.time() # Change to the contender version before training if (os.path.isfile(Hyperparameters.CONTENDER_MODEL_WEIGHTS_PATH)): print("Loading old contender model...") model.load_weights(Hyperparameters.CONTENDER_MODEL_WEIGHTS_PATH) # In the paper the sample training data from their training buffer, here we just run through all our current samples #inStates, valueLabels, policyLabels = MemoryBuffers.getAllTrainingData() inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData( ) model.fit( np.array(inStates), [np.array(valueLabels), np.array(policyLabels)], epochs=Hyperparameters.EPOCHS_PER_TRAINING, batch_size=Hyperparameters.MINI_BATCH_SIZE, verbose=2, shuffle=True) print("Training finished: {} ms".format(time.time() - t1)) return currentModelVersion + 1
def evaluateModelAgainstTrainingData(model): inStates, valueLabels, policyLabels = MemoryBuffers.getAllTrainingData() _, valueLoss, policyLoss = model.evaluate( np.array(inStates), [np.array(valueLabels), np.array(policyLabels)], verbose=2, shuffle=True) print("ValueLoss: {} PolicyLoss: {}".format(valueLoss, policyLoss))
def fitModel(modelAbsPath, gpuSettings, modelGeneration, startTime): import numpy as np print("Stored data points: ", MemoryBuffers.getAmountOfStoredDataPoints()) t1 = time.time() inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData( ) s = np.array(inStates) v = np.array(valueLabels) p = np.array(policyLabels) dataProcessingTime = time.time() - t1 print("Data preprocessing finished: {}".format(dataProcessingTime)) if (MachineSpecificSettings.REMOTE_WORKER_AND_TRAINER and dataProcessingTime < 5): print("Waiting for GPU") time.sleep(5 - dataProcessingTime) multipleGPUs = MachineSpecificSettings.AMOUNT_OF_GPUS > 1 proc = mp.Process(target=_fitModelProc, args=(modelAbsPath, multipleGPUs, gpuSettings, modelGeneration, s, v, p, startTime)) proc.start() proc.join()
def loopingTrainer(port, gpuSettings): connection, modelAbsPath = _init(port) import os, StartInit StartInit.init() print("Starting Trainer GPU-Settings: {}".format(gpuSettings)) os.environ['CUDA_VISIBLE_DEVICES'] = gpuSettings from Main.AlphaZero import NeuralNetworks import numpy as np import keras MachineSpecificSettings.setupHyperparameters() singleModel = keras.models.load_model(modelAbsPath) # In our experiments we ended up using only a single GPU for training. Since a to big batch-size gave weird results if (MachineSpecificSettings.AMOUNT_OF_GPUS > 1): trainingModel = NeuralNetworks.createMultipleGPUModel(singleModel) else: trainingModel = singleModel # Training Loop while (True): status, data = connection.readMessage() print("Got msg:", status) if (status == STATUS_TRAIN_DATA ): # TODO: Create an informative else statement t1 = time.time( ) # Only used for displaying elapsed time to the user modelVersion, states, values, policies, weights = data # Setup settings for this training turn keras.backend.set_value(trainingModel.optimizer.lr, _getLearningRate(modelVersion)) MemoryBuffers.CURRENT_MODEL_VERSION = modelVersion MemoryBuffers.addLabelsToReplayBuffer(states, values, policies) # Get all the data contained in the Replay Buffers. With pre-calculated average of similair states inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData( ) s = np.array(inStates) v = np.array(valueLabels) p = np.array(policyLabels) # Run the supervised-learning dataProcessingTime = time.time() - t1 print("Data preprocessing finished: {}".format(dataProcessingTime)) print("Using LR:", keras.backend.get_value(trainingModel.optimizer.lr)) trainingModel.fit([np.array(s), np.array(p)], np.array(v), epochs=Hyperparameters.EPOCHS_PER_TRAINING, batch_size=Hyperparameters.MINI_BATCH_SIZE, verbose=2, shuffle=True) singleModel.save(modelAbsPath, overwrite=True) singleModel.save(Hyperparameters.MODELS_SAVE_PATH + str(modelVersion + 1)) trainedModelAsBytes = _readModelFromDisk() print("Training finished:", time.time() - t1) connection.sendMessage("Finished", (trainedModelAsBytes, )) MemoryBuffers.storeTrainingDataToDisk()