class TestSimpleGetBatchNormalizationUpdates(object): def setUp(self): self.mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9]) self.x = tensor.matrix() def simple_assertions(self, updates, num_bricks=2, num_updates=4): """Shared assertions for simple tests.""" assert len(updates) == num_updates assert all(is_shared_variable(u[0]) for u in updates) # This order is somewhat arbitrary and implementation_dependent means = set(u[0] for u in updates if has_roles(u[0], [BATCH_NORM_POPULATION_MEAN])) stdevs = set(u[0] for u in updates if has_roles(u[0], [BATCH_NORM_POPULATION_STDEV])) assert means.isdisjoint(stdevs) assert len(set(get_brick(v) for v in means)) == num_bricks assert len(set(get_brick(v) for v in stdevs)) == num_bricks def test_get_batch_normalization_updates(self): """Test that get_batch_normalization_updates works as expected.""" with batch_normalization(self.mlp): y_bn = self.mlp.apply(self.x) graph = ComputationGraph([y_bn]) updates = get_batch_normalization_updates(graph) self.simple_assertions(updates) def test_get_batch_normalization_updates_non_training_applications(self): """Test updates extracton in graph with non-training apply.""" y = self.mlp.apply(self.x) with batch_normalization(self.mlp): y_bn = self.mlp.apply(self.x) graph = ComputationGraph([y_bn, y]) updates = get_batch_normalization_updates(graph) self.simple_assertions(updates) def test_get_batch_normalization_updates_no_training(self): """Test for exception if there are no training-mode nodes.""" y = self.mlp.apply(self.x) graph = ComputationGraph([y]) numpy.testing.assert_raises(ValueError, get_batch_normalization_updates, graph) def test_get_batch_normalization_updates_duplicates_error(self): """Test that we get an error by default on multiple apply.""" with batch_normalization(self.mlp): y = self.mlp.apply(self.x) y2 = self.mlp.apply(self.x) graph = ComputationGraph([y, y2]) numpy.testing.assert_raises(ValueError, get_batch_normalization_updates, graph) def test_get_batch_normalization_updates_allow_duplicates(self): """Test get_batch_normalization_updates(allow_duplicates=True).""" with batch_normalization(self.mlp): y = self.mlp.apply(self.x) y2 = self.mlp.apply(self.x) graph = ComputationGraph([y, y2]) updates = get_batch_normalization_updates(graph, allow_duplicates=True) self.simple_assertions(updates, num_bricks=2, num_updates=8)
class TestSimpleGetBatchNormalizationUpdates(object): def setUp(self): self.mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9]) self.x = tensor.matrix() def simple_assertions(self, updates, num_bricks=2, num_updates=4): """Shared assertions for simple tests.""" assert len(updates) == num_updates assert all(is_shared_variable(u[0]) for u in updates) # This order is somewhat arbitrary and implementation_dependent means = set(u[0] for u in updates if has_roles(u[0], [BATCH_NORM_POPULATION_MEAN])) stdevs = set(u[0] for u in updates if has_roles(u[0], [BATCH_NORM_POPULATION_STDEV])) assert means.isdisjoint(stdevs) assert len(set(get_brick(v) for v in means)) == num_bricks assert len(set(get_brick(v) for v in stdevs)) == num_bricks def test_get_batch_normalization_updates(self): """Test that get_batch_normalization_updates works as expected.""" with batch_normalization(self.mlp): y_bn = self.mlp.apply(self.x) graph = ComputationGraph([y_bn]) updates = get_batch_normalization_updates(graph) self.simple_assertions(updates) def test_get_batch_normalization_updates_non_training_applications(self): """Test updates extracton in graph with non-training apply.""" y = self.mlp.apply(self.x) with batch_normalization(self.mlp): y_bn = self.mlp.apply(self.x) graph = ComputationGraph([y_bn, y]) updates = get_batch_normalization_updates(graph) self.simple_assertions(updates) def test_get_batch_normalization_updates_no_training(self): """Test for exception if there are no training-mode nodes.""" y = self.mlp.apply(self.x) graph = ComputationGraph([y]) numpy.testing.assert_raises(ValueError, get_batch_normalization_updates, graph) def test_get_batch_normalization_updates_duplicates_error(self): """Test that we get an error by default on multiple apply.""" with batch_normalization(self.mlp): y = self.mlp.apply(self.x) y2 = self.mlp.apply(self.x) graph = ComputationGraph([y, y2]) numpy.testing.assert_raises(ValueError, get_batch_normalization_updates, graph) def test_get_batch_normalization_updates_allow_duplicates(self): """Test get_batch_normalization_updates(allow_duplicates=True).""" with batch_normalization(self.mlp): y = self.mlp.apply(self.x) y2 = self.mlp.apply(self.x) graph = ComputationGraph([y, y2]) updates = get_batch_normalization_updates(graph, allow_duplicates=True) self.simple_assertions(updates, num_bricks=2, num_updates=8)
def test_batch_normalized_mlp_transformed(): """Smoke test that a graph involving a BatchNormalizedMLP transforms.""" x = tensor.matrix('x') mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9]) with batch_normalization(mlp): y = mlp.apply(x) assert len(get_batch_normalization_updates(ComputationGraph([y]))) == 4
def test_get_batch_normalization_updates_mean_only(self): """Test get_batch_normalization_updates with mean_only bricks.""" mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9], mean_only=True) with batch_normalization(mlp): y_bn = mlp.apply(self.x) graph = ComputationGraph([y_bn]) updates = get_batch_normalization_updates(graph) self.simple_assertions(updates, num_updates=2, mean_only=True)
def test_get_batch_normalization_updates_mean_only(self): """Test get_batch_normalization_updates with mean_only bricks.""" mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9], mean_only=True) with batch_normalization(mlp): y_bn = mlp.apply(self.x) graph = ComputationGraph([y_bn]) updates = get_batch_normalization_updates(graph) self.simple_assertions(updates, num_updates=2, mean_only=True)
def training(runname, rnnType, maxPackets, packetTimeSteps, packetReverse, padOldTimeSteps, wtstd, lr, decay, clippings, dimIn, dim, attentionEnc, attentionContext, numClasses, batch_size, epochs, trainPercent, dataPath, loadPrepedData, channel): # pragma: no cover print locals() print X = T.tensor4('inputs') Y = T.matrix('targets') linewt_init = IsotropicGaussian(wtstd) line_bias = Constant(1.0) rnnwt_init = IsotropicGaussian(wtstd) rnnbias_init = Constant(0.0) classifierWts = IsotropicGaussian(wtstd) learning_rateClass = theano.shared(np.array(lr, dtype=theano.config.floatX)) learning_decay = np.array(decay, dtype=theano.config.floatX) ###DATA PREP print 'loading data' if loadPrepedData: hexSessions = loadFile(dataPath) else: sessioner = sessionizer.HexSessionizer(dataPath) hexSessions = sessioner.read_pcap() hexSessions = removeBadSessionizer(hexSessions) numSessions = len(hexSessions) print str(numSessions) + ' sessions found' hexSessionsKeys = order_keys(hexSessions) hexDict = hexTokenizer() print 'creating dictionary of ip communications' comsDict, uniqIPs = srcIpDict(hexSessions) comsDict = dictUniquerizer(comsDict) print 'initializing network graph' ###ENCODER if rnnType == 'gru': rnn = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gru') dimMultiplier = 2 else: rnn = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'lstm') dimMultiplier = 4 fork = Fork(output_names=['linear', 'gates'], name='fork', input_dim=dimIn, output_dims=[dim, dim * dimMultiplier], weights_init = linewt_init, biases_init = line_bias) ###CONTEXT if rnnType == 'gru': rnnContext = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gruContext') else: rnnContext = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'lstmContext') forkContext = Fork(output_names=['linearContext', 'gatesContext'], name='forkContext', input_dim=dim, output_dims=[dim, dim * dimMultiplier], weights_init = linewt_init, biases_init = line_bias) forkDec = Fork(output_names=['linear', 'gates'], name='forkDec', input_dim=dim, output_dims=[dim, dim*dimMultiplier], weights_init = linewt_init, biases_init = line_bias) #CLASSIFIER bmlp = BatchNormalizedMLP( activations=[Tanh(),Tanh()], dims=[dim, dim, numClasses], weights_init=classifierWts, biases_init=Constant(0.0001) ) #initialize the weights in all the functions fork.initialize() rnn.initialize() forkContext.initialize() rnnContext.initialize() forkDec.initialize() bmlp.initialize() def onestepEnc(X): data1, data2 = fork.apply(X) if rnnType == 'gru': hEnc = rnn.apply(data1, data2) else: hEnc, _ = rnn.apply(data2) return hEnc hEnc, _ = theano.scan(onestepEnc, X) #(mini*numPackets, packetLen, 1, hexdictLen) if attentionEnc: attentionmlpEnc = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts, biases_init=Constant(1.0)) attentionmlpEnc.initialize() hEncAttn = T.reshape(hEnc, (-1, packetTimeSteps, dim)) def onestepEncAttn(hEncAttn): preEncattn = attentionmlpEnc.apply(hEncAttn) attEncsoft = Softmax() attEncpyx = attEncsoft.apply(preEncattn.flatten()) attEncpred = attEncpyx.flatten() attenc = T.mul(hEncAttn.dimshuffle(1,0), attEncpred).dimshuffle(1,0) return attenc attenc, _ = theano.scan(onestepEncAttn, hEncAttn) hEncReshape = T.reshape(T.sum(attenc, axis = 1), (-1, maxPackets, 1, dim)) else: hEncReshape = T.reshape(hEnc[:,-1], (-1, maxPackets, 1, dim)) #[:,-1] takes the last rep for each packet #(mini, numPackets, 1, dimReduced) #[:,-1] takes the last rep for each packet #(mini, numPackets, 1, dimReduced) def onestepContext(hEncReshape): data3, data4 = forkContext.apply(hEncReshape) if rnnType == 'gru': hContext = rnnContext.apply(data3, data4) else: hContext, _ = rnnContext.apply(data4) return hContext hContext, _ = theano.scan(onestepContext, hEncReshape) if attentionContext: attentionmlpContext = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts, biases_init=Constant(1.0)) attentionmlpContext.initialize() hContextAttn = T.reshape(hContext, (-1,maxPackets,dim)) def onestepContextAttn(hContextAttn): preContextatt = attentionmlpContext.apply(hContextAttn) attContextsoft = Softmax() attContextpyx = attContextsoft.apply(preContextatt.flatten()) attContextpred = attContextpyx.flatten() attcontext = T.mul(hContextAttn.dimshuffle(1,0), attContextpred).dimshuffle(1,0) return attcontext attcontext, _ = theano.scan(onestepContextAttn, hContextAttn) hContextReshape = T.sum(attcontext, axis = 1) else: hContextReshape = T.reshape(hContext[:,-1], (-1,dim)) data5, _ = forkDec.apply(hContextReshape) pyx = bmlp.apply(data5) softmax = Softmax() softoutClass = softmax.apply(pyx) costClass = T.mean(CategoricalCrossEntropy().apply(Y, softoutClass)) #CREATE GRAPH cgClass = ComputationGraph([costClass]) paramsClass = VariableFilter(roles = [PARAMETER])(cgClass.variables) learning = learningfunctions.Learning(costClass,paramsClass,learning_rateClass,l1=0.,l2=0.,maxnorm=0.,c=clippings) updatesClass = learning.Adam() module_logger.info('starting graph compilation') classifierTrain = theano.function([X,Y], [costClass, hEnc, hContext, pyx, softoutClass], updates=updatesClass, allow_input_downcast=True) classifierPredict = theano.function([X], softoutClass, allow_input_downcast=True) module_logger.info('graph compilation finished') print 'finished graph compilation' trainIndex = int(len(hexSessionsKeys)*trainPercent) epochCost = [] gradNorms = [] trainAcc = [] testAcc = [] costCollect = [] trainCollect = [] module_logger.info('beginning training') iteration = 0 #epoch for epoch in xrange(epochs): #iteration/minibatch for start, end in zip(range(0, trainIndex,batch_size), range(batch_size, trainIndex, batch_size)): trainingTargets = [] trainingSessions = [] #create one minibatch with 0.5 normal and 0.5 abby normal traffic for trainKey in range(start, end): sessionForEncoding = list(hexSessions[hexSessions.keys()[trainKey]][0]) adfun = adversarialfunctions.Adversary(sessionForEncoding) adversaryList = [sessionForEncoding, adfun.dstIpSwapOut(comsDict, uniqIPs), adfun.portDirSwitcher(), adfun.ipDirSwitcher()] abbyIndex = random.sample(range(len(adversaryList)), 1)[0] targetClasses = [0]*numClasses targetClasses[abbyIndex] = 1 abbyTarget = np.array(targetClasses, dtype=theano.config.floatX) trainingSessions.append(abbyOneHotSes[0]) trainingTargets.append(abbyTarget) sessionsMinibatch = np.asarray(trainingSessions).reshape((-1, packetTimeSteps, 1, dimIn)) targetsMinibatch = np.asarray(trainingTargets) costfun = classifierTrain(sessionsMinibatch, targetsMinibatch) if iteration % (numSessions / (10 * batch_size)) == 0: costCollect.append(costfun[0]) trainCollect.append(np.mean(np.argmax(costfun[-1],axis=1) == np.argmax(targetsMinibatch, axis=1))) module_logger.info(' Iteration: ', iteration) module_logger.info(' Cost: ', np.mean(costCollect)) module_logger.info(' TRAIN accuracy: ', np.mean(trainCollect)) print ' Iteration: ', iteration print ' Cost: ', np.mean(costCollect) print ' TRAIN accuracy: ', np.mean(trainCollect) iteration+=1 #testing accuracy if iteration % (numSessions / (2 * batch_size)) == 0: predtar, acttar, testCollect = predictClass(classifierPredict, hexSessions, comsDict, uniqIPs, hexDict, hexSessionsKeys, numClasses, trainPercent, dimIn, maxPackets, packetTimeSteps, padOldTimeSteps) binaryPrecisionRecall(predtar, acttar, numClasses) module_logger.info(str(testCollect)) #save the models if iteration % (numSessions / (5 * batch_size)) == 0: save_model(classifierPredict) epochCost.append(np.mean(costCollect)) trainAcc.append(np.mean(trainCollect)) module_logger.info('Epoch: ', epoch) module_logger.info('Epoch cost average: ', epochCost[-1]) module_logger.info('Epoch TRAIN accuracy: ', trainAcc[-1]) print 'Epoch: ', epoch print 'Epoch cost average: ', epochCost[-1] print 'Epoch TRAIN accuracy: ', trainAcc[-1] return classifierTrain, classifierPredict