def selfPlay(selfPlayPool, model): t1 = time.time() computeTable = PreComputation.computePredictionTable( model) if Hyperparameters.USE_PREDICTION_CACHE else {} selfPlayPool.runJobs( [(Hyperparameters.AMOUNT_OF_GAMES_PER_WORKER, computeTable)] * Hyperparameters.AMOUNT_OF_SELF_PLAY_WORKERS) PredictionOracle.runPredictionOracle( model, selfPlayPool) # Enter Oracle mode on the main thread print("Self play finished: {} ms".format(time.time() - t1)) MemoryBuffers.storeTrainingDataToDisk()
def loopingTrainer(port, gpuSettings): connection, modelAbsPath = _init(port) import os, StartInit StartInit.init() print("Starting Trainer GPU-Settings: {}".format(gpuSettings)) os.environ['CUDA_VISIBLE_DEVICES'] = gpuSettings from Main.AlphaZero import NeuralNetworks import numpy as np import keras MachineSpecificSettings.setupHyperparameters() singleModel = keras.models.load_model(modelAbsPath) # In our experiments we ended up using only a single GPU for training. Since a to big batch-size gave weird results if (MachineSpecificSettings.AMOUNT_OF_GPUS > 1): trainingModel = NeuralNetworks.createMultipleGPUModel(singleModel) else: trainingModel = singleModel # Training Loop while (True): status, data = connection.readMessage() print("Got msg:", status) if (status == STATUS_TRAIN_DATA ): # TODO: Create an informative else statement t1 = time.time( ) # Only used for displaying elapsed time to the user modelVersion, states, values, policies, weights = data # Setup settings for this training turn keras.backend.set_value(trainingModel.optimizer.lr, _getLearningRate(modelVersion)) MemoryBuffers.CURRENT_MODEL_VERSION = modelVersion MemoryBuffers.addLabelsToReplayBuffer(states, values, policies) # Get all the data contained in the Replay Buffers. With pre-calculated average of similair states inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData( ) s = np.array(inStates) v = np.array(valueLabels) p = np.array(policyLabels) # Run the supervised-learning dataProcessingTime = time.time() - t1 print("Data preprocessing finished: {}".format(dataProcessingTime)) print("Using LR:", keras.backend.get_value(trainingModel.optimizer.lr)) trainingModel.fit([np.array(s), np.array(p)], np.array(v), epochs=Hyperparameters.EPOCHS_PER_TRAINING, batch_size=Hyperparameters.MINI_BATCH_SIZE, verbose=2, shuffle=True) singleModel.save(modelAbsPath, overwrite=True) singleModel.save(Hyperparameters.MODELS_SAVE_PATH + str(modelVersion + 1)) trainedModelAsBytes = _readModelFromDisk() print("Training finished:", time.time() - t1) connection.sendMessage("Finished", (trainedModelAsBytes, )) MemoryBuffers.storeTrainingDataToDisk()