def run(simulationIndex, X, Y): """Run the model""" print(("Training with:", simulationIndex)) dropoutRate = 0.25 mainGraph = ga.Graph() ffeed = mainGraph.addOperation(ga.Variable(X), doGradient=False, feederOperation=True) feedDrop = mainGraph.addOperation(ga.DropoutOperation( ffeed, dropoutRate), doGradient=False, finalOperation=False) l1 = ga.addDenseLayer(mainGraph, 100, inputOperation=feedDrop, activation=ga.ReLUActivation, dropoutRate=dropoutRate, batchNormalisation=True) l2 = ga.addDenseLayer(mainGraph, 10, inputOperation=l1, activation=ga.SoftmaxActivation, dropoutRate=0, batchNormalisation=False) fcost = mainGraph.addOperation( ga.CrossEntropyCostSoftmax(l2, Y), doGradient=False, finalOperation=True) def fprime(p, data, labels): mainGraph.feederOperation.assignData(data) mainGraph.resetAll() mainGraph.finalOperation.assignLabels(labels) mainGraph.attachParameters(p) c = mainGraph.feedForward() mainGraph.feedBackward() g = mainGraph.unrollGradients() return c, g param0 = mainGraph.unrollGradientParameters() adamGrad = ga.adaptiveSGD(trainingData=X, trainingLabels=Y, param0=param0, epochs=1e2, miniBatchSize=20, initialLearningRate=1e-2, beta1=0.9, beta2=0.999, epsilon=1e-8, testFrequency=1e2, function=fprime) pickleFilename = "minimizerParamsDense_" + str(simulationIndex) + ".pkl" # with open(pickleFilename, "rb") as fp: # adamParams = pickle.load(fp) # adamGrad.restoreState(adamParams) # params = adamParams["params"] params = adamGrad.minimize(printTrainigCost=True, printUpdateRate=False, dumpParameters=pickleFilename) mainGraph.attachParameters(params) return mainGraph
def run(): """Run the model""" N, D, H1, H2 = 10, 3, 4, 2 trainData = np.arange(0, N * D).reshape(N, D).astype(np.float) trainLabels = np.arange(0, N * H2).reshape(N, H2).astype(np.float) mainGraph = ga.Graph() ffeed = mainGraph.addOperation(ga.Variable(trainData), doGradient=False, feederOperation=True) feedDrop = mainGraph.addOperation(ga.DropoutOperation(ffeed, 0.0), doGradient=False, finalOperation=False) l1 = ga.addDenseLayer(mainGraph, H1, inputOperation=feedDrop, activation=ga.ReLUActivation, dropoutRate=0.0, batchNormalisation=True) l2 = ga.addDenseLayer(mainGraph, H2, inputOperation=l1, activation=ga.SoftmaxActivation, dropoutRate=0.0, batchNormalisation=False) fcost = mainGraph.addOperation(ga.CrossEntropyCostSoftmax(l2, trainLabels), doGradient=False, finalOperation=True) def f(x): mainGraph.attachParameters(x) return mainGraph.getValue() def fprime(p, data, labels): mainGraph.feederOperation.assignData(data) mainGraph.resetAll() mainGraph.finalOperation.assignLabels(labels) mainGraph.attachParameters(p) c = mainGraph.feedForward() mainGraph.feedBackward() g = mainGraph.unrollGradients() return c, g params = mainGraph.unrollGradientParameters() numGrad = scipy.optimize.approx_fprime(params, f, 1e-8) analCostGraph, analGradientGraph = fprime(params, trainData, trainLabels) return numGrad, analGradientGraph, analCostGraph, mainGraph
Y = allDatasets["train_labels"] Xtest = allDatasets["test_dataset"] Ytest = allDatasets["test_labels"] Xvalid = allDatasets["valid_dataset"] Yvalid = allDatasets["valid_labels"] # index = int(sys.argv[1]) index = 0 print("Training with:", index) dropValueL = 0.1 dropValueS = 0.05 # ------ Build a LeNet archicture CNN mainGraph = ga.Graph() feed = mainGraph.addOperation(ga.Variable(X), doGradient=False, feederOperation=True) feedDrop = mainGraph.addOperation(ga.DropoutOperation(feed, dropValueS), doGradient=False, finalOperation=False) cnn1 = ga.addConv2dLayer(mainGraph, inputOperation=feedDrop, nFilters=20, filterHeigth=5, filterWidth=5, padding="SAME", convStride=1, activation=ga.ReLUActivation,
def run(simulationIndex, X, Y=None): """Run the model""" print("Training with:", simulationIndex) seriesLength, nFeatures = X.shape # ------ it is important that the exampleLength is the same as # ------ the number if examples in the mini batch so that # ------ the state of the RNN is continously passed forward exampleLength = 4 nExamples = exampleLength nHidden0 = 25 nHidden1 = 25 mainGraph = ga.Graph(False) dummyX = np.zeros((nExamples, exampleLength, nFeatures)) feed = mainGraph.addOperation(ga.Variable(dummyX), feederOperation=True) # ------ Generate the network, options are RNN and LSTM gates # ------ Add initial layer and then possibly append more hactivations0, cStates0 = ga.addInitialLSTMLayer(mainGraph, inputOperation=feed, nHidden=nHidden0) hactivations1, cStates1 = ga.appendLSTMLayer( mainGraph, previousActivations=hactivations0, nHidden=nHidden1) # hactivations0 = ga.addInitialRNNLayer(mainGraph, # inputOperation=feed, # activation=ga.TanhActivation, # nHidden=nHidden1) # hactivations1 = ga.appendRNNLayer(mainGraph, # previousActivations=hactivations0, # activation=ga.TanhActivation, # nHidden=nHidden1) finalCost, costOperationsList = ga.addRNNCost( mainGraph, hactivations1, costActivation=ga.SoftmaxActivation, costOperation=ga.CrossEntropyCostSoftmax, nHidden=nHidden1, labelsShape=feed.shape, labels=None) hactivations = [hactivations0, hactivations1] cStates = [cStates0, cStates1] nHiddenList = [nHidden0, nHidden1] def fprime(p, data, labels, costOperationsList=costOperationsList, mainGraph=mainGraph): mainGraph.feederOperation.assignData(data) mainGraph.resetAll() for index, cop in enumerate(costOperationsList): cop.assignLabels(labels[:, index, :]) mainGraph.attachParameters(p) c = mainGraph.feedForward() mainGraph.feedBackward() g = mainGraph.unrollGradients() nLayers = len(hactivations) for i in range(nLayers): hactivations[i][0].assignData(hactivations[i][-1].getValue()) cStates[i][0].assignData(cStates[i][-1].getValue()) return c, g param0 = mainGraph.unrollGradientParameters() print("Number of parameters to train:", len(param0)) adamGrad = ga.adaptiveSGDrecurrent(trainingData=X, param0=param0, epochs=1e3, miniBatchSize=nExamples, exampleLength=exampleLength, initialLearningRate=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-8, testFrequency=1e2, function=fprime) pickleFilename = "minimizerParamsRNN_" + str(simulationIndex) + ".pkl" # with open(pickleFilename, "rb") as fp: # adamParams = pickle.load(fp) # adamGrad.restoreState(adamParams) # params = adamParams["params"] params = adamGrad.minimize(printTrainigCost=True, printUpdateRate=False, dumpParameters=pickleFilename) mainGraph.attachParameters(params) cache = (nFeatures, nHiddenList, hactivations, cStates, costOperationsList) return mainGraph, cache, adamGrad.costLists[-1]
import pickle import sys """Control script""" simulationIndex = 0 pickleFilename = "dataSet/singleSentence.pkl" with open(pickleFilename, "rb") as fp: x, index_to_word, word_to_index = pickle.load(fp) seriesLength, nFeatures = x.shape nExamples = 2 exampleLength = 15 nHidden0 = 25 nHidden1 = 25 mainGraph = ga.Graph(False) dummyX = np.zeros((nExamples, exampleLength, nFeatures)) feed = mainGraph.addOperation(ga.Variable(dummyX), feederOperation=True) # ------ Generate the network, options are RNN and LSTM gates # ------ Add initial layer and then possibly append more hactivations0, cStates0 = ga.addInitialLSTMLayer(mainGraph, inputOperation=feed, nHidden=nHidden0) hactivations1, cStates1 = ga.appendLSTMLayer(mainGraph, previousActivations=hactivations0, nHidden=nHidden1) # hactivations0 = ga.addInitialRNNLayer(mainGraph, # inputOperation=feed, # activation=ga.TanhActivation,
def run(simulationIndex, X, Y): """Run the model""" # index = int(sys.argv[1]) print("Training with:", simulationIndex) dropValueL = 0.1 dropValueS = 0.05 # ------ Build a LeNet archicture CNN mainGraph = ga.Graph() feed = mainGraph.addOperation(ga.Variable(X), doGradient=False, feederOperation=True) feedDrop = mainGraph.addOperation(ga.DropoutOperation(feed, dropValueS), doGradient=False, finalOperation=False) cnn1 = ga.addConv2dLayer(mainGraph, inputOperation=feedDrop, nFilters=20, filterHeigth=5, filterWidth=5, padding="SAME", convStride=1, activation=ga.ReLUActivation, batchNormalisation=False, pooling=ga.MaxPoolOperation, poolHeight=2, poolWidth=2, poolStride=2) cnn2 = ga.addConv2dLayer(mainGraph, inputOperation=cnn1, nFilters=50, filterHeigth=5, filterWidth=5, padding="SAME", convStride=1, activation=ga.ReLUActivation, batchNormalisation=True, pooling=ga.MaxPoolOperation, poolHeight=2, poolWidth=2, poolStride=2) flattenOp = mainGraph.addOperation(ga.FlattenFeaturesOperation(cnn2)) flattenDrop = mainGraph.addOperation(ga.DropoutOperation( flattenOp, dropValueL), doGradient=False, finalOperation=False) l1 = ga.addDenseLayer(mainGraph, 500, inputOperation=flattenDrop, activation=ga.ReLUActivation, dropoutRate=dropValueL, batchNormalisation=True) l2 = ga.addDenseLayer(mainGraph, 10, inputOperation=l1, activation=ga.SoftmaxActivation, dropoutRate=0.0, batchNormalisation=False) fcost = mainGraph.addOperation(ga.CrossEntropyCostSoftmax(l2, Y), doGradient=False, finalOperation=True) def fprime(p, data, labels): mainGraph.feederOperation.assignData(data) mainGraph.resetAll() mainGraph.finalOperation.assignLabels(labels) mainGraph.attachParameters(p) c = mainGraph.feedForward() mainGraph.feedBackward() g = mainGraph.unrollGradients() return c, g param0 = mainGraph.unrollGradientParameters() adamGrad = ga.adaptiveSGD(trainingData=X, trainingLabels=Y, param0=param0, epochs=10, miniBatchSize=10, initialLearningRate=1e-2, beta1=0.9, beta2=0.999, epsilon=1e-8, testFrequency=1e1, function=fprime) pickleFilename = "minimizerParamsCNN_" + str(simulationIndex) + ".pkl" # with open(pickleFilename, "rb") as fp: # adamParams = pickle.load(fp) # adamGrad.restoreState(adamParams) # params = adamParams["params"] params = adamGrad.minimize(printTrainigCost=True, printUpdateRate=False, dumpParameters=pickleFilename) mainGraph.attachParameters(params) return mainGraph
def run(): """Run the model""" N, T, D, H1, H2 = 2, 3, 4, 5, 4 trainData = np.linspace(-0.1, 0.3, num=N * T * D).reshape(N, T, D) trainLabels = np.random.random((N, T, D)) mainGraph = ga.Graph(False) xop = mainGraph.addOperation(ga.Variable(trainData), feederOperation=True) hactivations0, cStates0 = ga.addInitialLSTMLayer(mainGraph, inputOperation=xop, nHidden=H1) hactivations1, cStates1 = ga.appendLSTMLayer( mainGraph, previousActivations=hactivations0, nHidden=H2) # hactivations0 = ga.addInitialRNNLayer(mainGraph, # inputOperation=xop, # activation=ga.TanhActivation, # nHidden=H1) # hactivations1 = ga.appendRNNLayer(mainGraph, # previousActivations=hactivations0, # activation=ga.TanhActivation, # nHidden=H2) finalCost, costOperationsList = ga.addRNNCost( mainGraph, hactivations1, costActivation=ga.SoftmaxActivation, costOperation=ga.CrossEntropyCostSoftmax, nHidden=H2, labelsShape=xop.shape, labels=None) def f(p, costOperationsList=costOperationsList, mainGraph=mainGraph): data = trainData labels = trainLabels mainGraph.feederOperation.assignData(data) mainGraph.resetAll() for index, cop in enumerate(costOperationsList): cop.assignLabels(labels[:, index, :]) mainGraph.attachParameters(p) c = mainGraph.feedForward() return c hactivations = [hactivations0, hactivations1] cStates = [cStates0, cStates1] def fprime(p, data, labels, costOperationsList=costOperationsList, mainGraph=mainGraph): mainGraph.feederOperation.assignData(data) mainGraph.resetAll() for index, cop in enumerate(costOperationsList): cop.assignLabels(labels[:, index, :]) mainGraph.attachParameters(p) c = mainGraph.feedForward() mainGraph.feedBackward() g = mainGraph.unrollGradients() nLayers = len(hactivations) for i in range(nLayers): hactivations[i][0].assignData(hactivations[i][-1].getValue()) cStates[i][0].assignData(cStates[i][-1].getValue()) return c, g params = mainGraph.unrollGradientParameters() numGrad = scipy.optimize.approx_fprime(params, f, 1e-8) analCostGraph, analGradientGraph = fprime(params, trainData, trainLabels) return numGrad, analGradientGraph, analCostGraph, mainGraph
def run(): """Run the model""" trainData = np.random.random((5, 1, 10, 10)) trainLabels = np.random.random((5, 10)) # ------ conv2D operation testing mainGraph = ga.Graph() feed = mainGraph.addOperation(ga.Variable(trainData), doGradient=False, feederOperation=True) cnn1 = ga.addConv2dLayer(mainGraph, inputOperation=feed, nFilters=3, filterHeigth=5, filterWidth=5, padding="SAME", convStride=1, activation=ga.ReLUActivation, batchNormalisation=True, pooling=ga.MaxPoolOperation, poolHeight=2, poolWidth=2, poolStride=2) flattenOp = mainGraph.addOperation(ga.FlattenFeaturesOperation(cnn1)) flattenDrop = mainGraph.addOperation(ga.DropoutOperation(flattenOp, 0.0), doGradient=False, finalOperation=False) l1 = ga.addDenseLayer(mainGraph, 20, inputOperation=flattenDrop, activation=ga.ReLUActivation, dropoutRate=0.0, batchNormalisation=False) l2 = ga.addDenseLayer(mainGraph, 10, inputOperation=l1, activation=ga.SoftmaxActivation, dropoutRate=0.0, batchNormalisation=False) fcost = mainGraph.addOperation(ga.CrossEntropyCostSoftmax(l2, trainLabels), doGradient=False, finalOperation=True) def fprime(p, data, labels): mainGraph.feederOperation.assignData(data) mainGraph.resetAll() mainGraph.finalOperation.assignLabels(labels) mainGraph.attachParameters(p) c = mainGraph.feedForward() mainGraph.feedBackward() g = mainGraph.unrollGradients() return c, g def f(p): data = trainData labels = trainLabels mainGraph.feederOperation.assignData(data) mainGraph.resetAll() mainGraph.finalOperation.assignLabels(labels) mainGraph.attachParameters(p) c = mainGraph.feedForward() return c params = mainGraph.unrollGradientParameters() numGrad = scipy.optimize.approx_fprime(params, f, 1e-8) analCostGraph, analGradientGraph = fprime(params, trainData, trainLabels) return numGrad, analGradientGraph, analCostGraph, mainGraph