def test_batch_normalized_mlp_learn_scale_propagated_at_alloc(): """Test that setting learn_scale on a BatchNormalizedMLP works.""" mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9], learn_scale=False) assert not mlp.learn_scale assert all(act.children[0].learn_scale for act in mlp.activations) mlp.allocate() assert not any(act.children[0].learn_scale for act in mlp.activations)
def test_batch_normalized_mlp_transformed(): """Smoke test that a graph involving a BatchNormalizedMLP transforms.""" x = tensor.matrix('x') mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9]) with batch_normalization(mlp): y = mlp.apply(x) assert len(get_batch_normalization_updates(ComputationGraph([y]))) == 4
def test_batch_normalized_mlp_allocation(): """Test that BatchNormalizedMLP performs allocation correctly.""" mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9]) mlp.allocate() assert mlp.activations[0].children[0].input_dim == 7 assert mlp.activations[1].children[0].input_dim == 9 assert not any(l.use_bias for l in mlp.linear_transformations)
def test_batch_normalized_mlp_mean_only_propagated_at_alloc(): """Test that setting mean_only on a BatchNormalizedMLP works.""" mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9], mean_only=True) assert mlp.mean_only assert not any(act.children[0].mean_only for act in mlp.activations) mlp.allocate() assert all(act.children[0].mean_only for act in mlp.activations)
def test_get_batch_normalization_updates_mean_only(self): """Test get_batch_normalization_updates with mean_only bricks.""" mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9], mean_only=True) with batch_normalization(mlp): y_bn = mlp.apply(self.x) graph = ComputationGraph([y_bn]) updates = get_batch_normalization_updates(graph) self.simple_assertions(updates, num_updates=2, mean_only=True)
def test_batch_normalized_mlp_conserve_memory_propagated(): """Test that setting conserve_memory on a BatchNormalizedMLP works.""" mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9], conserve_memory=False) assert not any(act.children[0].conserve_memory for act in mlp.activations) mlp.conserve_memory = True assert mlp.conserve_memory assert all(act.children[0].conserve_memory for act in mlp.activations)
class TestSimpleGetBatchNormalizationUpdates(object): def setUp(self): self.mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9]) self.x = tensor.matrix() def simple_assertions(self, updates, num_bricks=2, num_updates=4): """Shared assertions for simple tests.""" assert len(updates) == num_updates assert all(is_shared_variable(u[0]) for u in updates) # This order is somewhat arbitrary and implementation_dependent means = set(u[0] for u in updates if has_roles(u[0], [BATCH_NORM_POPULATION_MEAN])) stdevs = set(u[0] for u in updates if has_roles(u[0], [BATCH_NORM_POPULATION_STDEV])) assert means.isdisjoint(stdevs) assert len(set(get_brick(v) for v in means)) == num_bricks assert len(set(get_brick(v) for v in stdevs)) == num_bricks def test_get_batch_normalization_updates(self): """Test that get_batch_normalization_updates works as expected.""" with batch_normalization(self.mlp): y_bn = self.mlp.apply(self.x) graph = ComputationGraph([y_bn]) updates = get_batch_normalization_updates(graph) self.simple_assertions(updates) def test_get_batch_normalization_updates_non_training_applications(self): """Test updates extracton in graph with non-training apply.""" y = self.mlp.apply(self.x) with batch_normalization(self.mlp): y_bn = self.mlp.apply(self.x) graph = ComputationGraph([y_bn, y]) updates = get_batch_normalization_updates(graph) self.simple_assertions(updates) def test_get_batch_normalization_updates_no_training(self): """Test for exception if there are no training-mode nodes.""" y = self.mlp.apply(self.x) graph = ComputationGraph([y]) numpy.testing.assert_raises(ValueError, get_batch_normalization_updates, graph) def test_get_batch_normalization_updates_duplicates_error(self): """Test that we get an error by default on multiple apply.""" with batch_normalization(self.mlp): y = self.mlp.apply(self.x) y2 = self.mlp.apply(self.x) graph = ComputationGraph([y, y2]) numpy.testing.assert_raises(ValueError, get_batch_normalization_updates, graph) def test_get_batch_normalization_updates_allow_duplicates(self): """Test get_batch_normalization_updates(allow_duplicates=True).""" with batch_normalization(self.mlp): y = self.mlp.apply(self.x) y2 = self.mlp.apply(self.x) graph = ComputationGraph([y, y2]) updates = get_batch_normalization_updates(graph, allow_duplicates=True) self.simple_assertions(updates, num_bricks=2, num_updates=8)
def test_batch_normalized_mlp_construction(): """Test that BatchNormalizedMLP performs construction correctly.""" mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9]) assert all(isinstance(a, Sequence) for a in mlp.activations) assert all( isinstance(a.children[0], BatchNormalization) for a in mlp.activations) assert all(isinstance(a.children[1], Tanh) for a in mlp.activations)
def build_model(images, labels): # Construct a bottom convolutional sequence bottom_conv_sequence = convolutional_sequence((3,3), 16, (160, 160)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) #top_mlp = MLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) top_mlp = BatchNormalizedMLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) # Construct feedforward sequence ss_seq = FeedforwardSequence([bottom_conv_sequence.apply, flattener.apply, top_mlp.apply]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction) # add regularization selector = Selector([top_mlp]) Ws = selector.get_parameters('W') mlp_brick_name = 'batchnormalizedmlp' W0 = Ws['/%s/linear_0.W' % mlp_brick_name] W1 = Ws['/%s/linear_1.W' % mlp_brick_name] cost = cost_noreg + .01 * (W0 ** 2).mean() + .01 * (W1 ** 2).mean() return cost
def __init__(self, image_dimension, **kwargs): layers = [] ############################################# # a first block with 2 convolutions of 32 (3, 3) filters layers.append(Convolutional((3, 3), 32, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 32, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 2nd block with 3 convolutions of 64 (3, 3) filters layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 3rd block with 4 convolutions of 128 (3, 3) filters layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) self.conv_sequence = ConvolutionalSequence(layers, 3, image_size=image_dimension) flattener = Flattener() self.top_mlp = BatchNormalizedMLP(activations=[Rectifier(), Logistic()], dims=[500, 1]) application_methods = [self.conv_sequence.apply, flattener.apply, self.top_mlp.apply] super(VGGNet, self).__init__(application_methods, biases_init=Constant(0), weights_init=Uniform(width=.1), **kwargs)
def setUp(self): self.mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9]) self.x = tensor.matrix()
def build_model(images, labels): vgg = VGG(layer='conv3_4') vgg.push_initialization_config() vgg.initialize() sb = SubstractBatch() # Construct a bottom convolutional sequence layers = [ Convolutional(filter_size=(3, 3), num_filters=100, use_bias=True, tied_biases=True, name='final_conv0'), BatchNormalization(name='batchnorm_1'), Rectifier(name='final_conv0_act'), Convolutional(filter_size=(3, 3), num_filters=100, use_bias=True, tied_biases=True, name='final_conv1'), BatchNormalization(name='batchnorm_2'), Rectifier(name='final_conv1_act'), MaxPooling(pooling_size=(2, 2), name='maxpool_final') ] bottom_conv_sequence = ConvolutionalSequence( layers, num_channels=256, image_size=(40, 40), biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) print 'dim output conv:', bottom_conv_sequence.get_dim('output') # conv_out_dim = 20 * 40 * 40 top_mlp = BatchNormalizedMLP( [Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) # Construct feedforward sequence ss_seq = FeedforwardSequence([ vgg.apply, bottom_conv_sequence.apply, flattener.apply, top_mlp.apply ]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction) # add regularization selector = Selector([top_mlp]) Ws = selector.get_parameters('W') mlp_brick_name = 'batchnormalizedmlp' W0 = Ws['/%s/linear_0.W' % mlp_brick_name] W1 = Ws['/%s/linear_1.W' % mlp_brick_name] cost = cost_noreg + .0001 * (W0**2).sum() + .001 * (W1**2).sum() # define learned parameters selector = Selector([ss_seq]) Ws = selector.get_parameters('W') bs = selector.get_parameters('b') BNSCs = selector.get_parameters('batch_norm_scale') BNSHs = selector.get_parameters('batch_norm_shift') parameters_top = [] parameters_top += [v for k, v in Ws.items()] parameters_top += [v for k, v in bs.items()] parameters_top += [v for k, v in BNSCs.items()] parameters_top += [v for k, v in BNSHs.items()] selector = Selector([vgg]) convs = selector.get_parameters() parameters_all = [] parameters_all += parameters_top parameters_all += [v for k, v in convs.items()] return cost, [parameters_top, parameters_all]
def __init__( self, dim, emb_dim, vocab, def_emb_translate_dim=-1, def_dim=-1, encoder='bilstm', bn=True, def_reader=None, def_combiner=None, dropout=0.5, num_input_words=-1, # Others **kwargs): self._dropout = dropout self._vocab = vocab self._emb_dim = emb_dim self._def_reader = def_reader self._def_combiner = def_combiner if encoder != 'bilstm': raise NotImplementedError() if def_emb_translate_dim < 0: self.def_emb_translate_dim = emb_dim else: self.def_emb_translate_dim = def_emb_translate_dim if def_dim < 0: self._def_dim = emb_dim else: self._def_dim = def_dim if num_input_words > 0: logger.info("Restricting vocab to " + str(num_input_words)) self._num_input_words = num_input_words else: self._num_input_words = vocab.size() children = [] if self.def_emb_translate_dim != self._emb_dim: self._translate_pre_def = Linear(input_dim=emb_dim, output_dim=def_emb_translate_dim) children.append(self._translate_pre_def) else: self._translate_pre_def = None ## Embedding self._lookup = LookupTable(self._num_input_words, emb_dim, weights_init=GlorotUniform()) children.append(self._lookup) if def_reader: self._final_emb_dim = self._def_dim self._def_reader = def_reader self._def_combiner = def_combiner children.extend([self._def_reader, self._def_combiner]) else: self._final_emb_dim = self._emb_dim ## BiLSTM self._hyp_bidir_fork = Linear( self._def_dim if def_reader else self._emb_dim, 4 * dim, name='hyp_bidir_fork') self._hyp_bidir = Bidirectional(LSTM(dim), name='hyp_bidir') self._prem_bidir_fork = Linear( self._def_dim if def_reader else self._emb_dim, 4 * dim, name='prem_bidir_fork') self._prem_bidir = Bidirectional(LSTM(dim), name='prem_bidir') children.extend([self._hyp_bidir_fork, self._hyp_bidir]) children.extend([self._prem_bidir, self._prem_bidir_fork]) ## BiLSTM no. 2 (encoded attentioned embeddings) self._hyp_bidir_fork2 = Linear(8 * dim, 4 * dim, name='hyp_bidir_fork2') self._hyp_bidir2 = Bidirectional(LSTM(dim), name='hyp_bidir2') self._prem_bidir_fork2 = Linear(8 * dim, 4 * dim, name='prem_bidir_fork2') self._prem_bidir2 = Bidirectional(LSTM(dim), name='prem_bidir2') children.extend([self._hyp_bidir_fork2, self._hyp_bidir2]) children.extend([self._prem_bidir2, self._prem_bidir_fork2]) self._rnns = [ self._prem_bidir2, self._hyp_bidir2, self._prem_bidir, self._hyp_bidir ] ## MLP if bn: self._mlp = BatchNormalizedMLP([Tanh()], [8 * dim, dim], conserve_memory=False, name="mlp") self._pred = BatchNormalizedMLP([Softmax()], [dim, 3], conserve_memory=False, name="pred_mlp") else: self._mlp = MLP([Tanh()], [8 * dim, dim], name="mlp") self._pred = MLP([Softmax()], [dim, 3], name="pred_mlp") children.append(self._mlp) children.append(self._pred) ## Softmax self._ndim_softmax = NDimensionalSoftmax() children.append(self._ndim_softmax) super(ESIM, self).__init__(children=children, **kwargs)
def training(runname, rnnType, maxPackets, packetTimeSteps, packetReverse, padOldTimeSteps, wtstd, lr, decay, clippings, dimIn, dim, attentionEnc, attentionContext, numClasses, batch_size, epochs, trainPercent, dataPath, loadPrepedData, channel): # pragma: no cover print locals() print X = T.tensor4('inputs') Y = T.matrix('targets') linewt_init = IsotropicGaussian(wtstd) line_bias = Constant(1.0) rnnwt_init = IsotropicGaussian(wtstd) rnnbias_init = Constant(0.0) classifierWts = IsotropicGaussian(wtstd) learning_rateClass = theano.shared(np.array(lr, dtype=theano.config.floatX)) learning_decay = np.array(decay, dtype=theano.config.floatX) ###DATA PREP print 'loading data' if loadPrepedData: hexSessions = loadFile(dataPath) else: sessioner = sessionizer.HexSessionizer(dataPath) hexSessions = sessioner.read_pcap() hexSessions = removeBadSessionizer(hexSessions) numSessions = len(hexSessions) print str(numSessions) + ' sessions found' hexSessionsKeys = order_keys(hexSessions) hexDict = hexTokenizer() print 'creating dictionary of ip communications' comsDict, uniqIPs = srcIpDict(hexSessions) comsDict = dictUniquerizer(comsDict) print 'initializing network graph' ###ENCODER if rnnType == 'gru': rnn = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gru') dimMultiplier = 2 else: rnn = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'lstm') dimMultiplier = 4 fork = Fork(output_names=['linear', 'gates'], name='fork', input_dim=dimIn, output_dims=[dim, dim * dimMultiplier], weights_init = linewt_init, biases_init = line_bias) ###CONTEXT if rnnType == 'gru': rnnContext = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gruContext') else: rnnContext = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'lstmContext') forkContext = Fork(output_names=['linearContext', 'gatesContext'], name='forkContext', input_dim=dim, output_dims=[dim, dim * dimMultiplier], weights_init = linewt_init, biases_init = line_bias) forkDec = Fork(output_names=['linear', 'gates'], name='forkDec', input_dim=dim, output_dims=[dim, dim*dimMultiplier], weights_init = linewt_init, biases_init = line_bias) #CLASSIFIER bmlp = BatchNormalizedMLP( activations=[Tanh(),Tanh()], dims=[dim, dim, numClasses], weights_init=classifierWts, biases_init=Constant(0.0001) ) #initialize the weights in all the functions fork.initialize() rnn.initialize() forkContext.initialize() rnnContext.initialize() forkDec.initialize() bmlp.initialize() def onestepEnc(X): data1, data2 = fork.apply(X) if rnnType == 'gru': hEnc = rnn.apply(data1, data2) else: hEnc, _ = rnn.apply(data2) return hEnc hEnc, _ = theano.scan(onestepEnc, X) #(mini*numPackets, packetLen, 1, hexdictLen) if attentionEnc: attentionmlpEnc = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts, biases_init=Constant(1.0)) attentionmlpEnc.initialize() hEncAttn = T.reshape(hEnc, (-1, packetTimeSteps, dim)) def onestepEncAttn(hEncAttn): preEncattn = attentionmlpEnc.apply(hEncAttn) attEncsoft = Softmax() attEncpyx = attEncsoft.apply(preEncattn.flatten()) attEncpred = attEncpyx.flatten() attenc = T.mul(hEncAttn.dimshuffle(1,0), attEncpred).dimshuffle(1,0) return attenc attenc, _ = theano.scan(onestepEncAttn, hEncAttn) hEncReshape = T.reshape(T.sum(attenc, axis = 1), (-1, maxPackets, 1, dim)) else: hEncReshape = T.reshape(hEnc[:,-1], (-1, maxPackets, 1, dim)) #[:,-1] takes the last rep for each packet #(mini, numPackets, 1, dimReduced) #[:,-1] takes the last rep for each packet #(mini, numPackets, 1, dimReduced) def onestepContext(hEncReshape): data3, data4 = forkContext.apply(hEncReshape) if rnnType == 'gru': hContext = rnnContext.apply(data3, data4) else: hContext, _ = rnnContext.apply(data4) return hContext hContext, _ = theano.scan(onestepContext, hEncReshape) if attentionContext: attentionmlpContext = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts, biases_init=Constant(1.0)) attentionmlpContext.initialize() hContextAttn = T.reshape(hContext, (-1,maxPackets,dim)) def onestepContextAttn(hContextAttn): preContextatt = attentionmlpContext.apply(hContextAttn) attContextsoft = Softmax() attContextpyx = attContextsoft.apply(preContextatt.flatten()) attContextpred = attContextpyx.flatten() attcontext = T.mul(hContextAttn.dimshuffle(1,0), attContextpred).dimshuffle(1,0) return attcontext attcontext, _ = theano.scan(onestepContextAttn, hContextAttn) hContextReshape = T.sum(attcontext, axis = 1) else: hContextReshape = T.reshape(hContext[:,-1], (-1,dim)) data5, _ = forkDec.apply(hContextReshape) pyx = bmlp.apply(data5) softmax = Softmax() softoutClass = softmax.apply(pyx) costClass = T.mean(CategoricalCrossEntropy().apply(Y, softoutClass)) #CREATE GRAPH cgClass = ComputationGraph([costClass]) paramsClass = VariableFilter(roles = [PARAMETER])(cgClass.variables) learning = learningfunctions.Learning(costClass,paramsClass,learning_rateClass,l1=0.,l2=0.,maxnorm=0.,c=clippings) updatesClass = learning.Adam() module_logger.info('starting graph compilation') classifierTrain = theano.function([X,Y], [costClass, hEnc, hContext, pyx, softoutClass], updates=updatesClass, allow_input_downcast=True) classifierPredict = theano.function([X], softoutClass, allow_input_downcast=True) module_logger.info('graph compilation finished') print 'finished graph compilation' trainIndex = int(len(hexSessionsKeys)*trainPercent) epochCost = [] gradNorms = [] trainAcc = [] testAcc = [] costCollect = [] trainCollect = [] module_logger.info('beginning training') iteration = 0 #epoch for epoch in xrange(epochs): #iteration/minibatch for start, end in zip(range(0, trainIndex,batch_size), range(batch_size, trainIndex, batch_size)): trainingTargets = [] trainingSessions = [] #create one minibatch with 0.5 normal and 0.5 abby normal traffic for trainKey in range(start, end): sessionForEncoding = list(hexSessions[hexSessions.keys()[trainKey]][0]) adfun = adversarialfunctions.Adversary(sessionForEncoding) adversaryList = [sessionForEncoding, adfun.dstIpSwapOut(comsDict, uniqIPs), adfun.portDirSwitcher(), adfun.ipDirSwitcher()] abbyIndex = random.sample(range(len(adversaryList)), 1)[0] targetClasses = [0]*numClasses targetClasses[abbyIndex] = 1 abbyTarget = np.array(targetClasses, dtype=theano.config.floatX) trainingSessions.append(abbyOneHotSes[0]) trainingTargets.append(abbyTarget) sessionsMinibatch = np.asarray(trainingSessions).reshape((-1, packetTimeSteps, 1, dimIn)) targetsMinibatch = np.asarray(trainingTargets) costfun = classifierTrain(sessionsMinibatch, targetsMinibatch) if iteration % (numSessions / (10 * batch_size)) == 0: costCollect.append(costfun[0]) trainCollect.append(np.mean(np.argmax(costfun[-1],axis=1) == np.argmax(targetsMinibatch, axis=1))) module_logger.info(' Iteration: ', iteration) module_logger.info(' Cost: ', np.mean(costCollect)) module_logger.info(' TRAIN accuracy: ', np.mean(trainCollect)) print ' Iteration: ', iteration print ' Cost: ', np.mean(costCollect) print ' TRAIN accuracy: ', np.mean(trainCollect) iteration+=1 #testing accuracy if iteration % (numSessions / (2 * batch_size)) == 0: predtar, acttar, testCollect = predictClass(classifierPredict, hexSessions, comsDict, uniqIPs, hexDict, hexSessionsKeys, numClasses, trainPercent, dimIn, maxPackets, packetTimeSteps, padOldTimeSteps) binaryPrecisionRecall(predtar, acttar, numClasses) module_logger.info(str(testCollect)) #save the models if iteration % (numSessions / (5 * batch_size)) == 0: save_model(classifierPredict) epochCost.append(np.mean(costCollect)) trainAcc.append(np.mean(trainCollect)) module_logger.info('Epoch: ', epoch) module_logger.info('Epoch cost average: ', epochCost[-1]) module_logger.info('Epoch TRAIN accuracy: ', trainAcc[-1]) print 'Epoch: ', epoch print 'Epoch cost average: ', epochCost[-1] print 'Epoch TRAIN accuracy: ', trainAcc[-1] return classifierTrain, classifierPredict