Beispiel #1
0
class TestSimpleGetBatchNormalizationUpdates(object):
    def setUp(self):
        self.mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9])
        self.x = tensor.matrix()

    def simple_assertions(self, updates, num_bricks=2, num_updates=4):
        """Shared assertions for simple tests."""
        assert len(updates) == num_updates
        assert all(is_shared_variable(u[0]) for u in updates)
        # This order is somewhat arbitrary and implementation_dependent
        means = set(u[0] for u in updates
                    if has_roles(u[0], [BATCH_NORM_POPULATION_MEAN]))
        stdevs = set(u[0] for u in updates
                     if has_roles(u[0], [BATCH_NORM_POPULATION_STDEV]))
        assert means.isdisjoint(stdevs)
        assert len(set(get_brick(v) for v in means)) == num_bricks
        assert len(set(get_brick(v) for v in stdevs)) == num_bricks

    def test_get_batch_normalization_updates(self):
        """Test that get_batch_normalization_updates works as expected."""
        with batch_normalization(self.mlp):
            y_bn = self.mlp.apply(self.x)
        graph = ComputationGraph([y_bn])
        updates = get_batch_normalization_updates(graph)
        self.simple_assertions(updates)

    def test_get_batch_normalization_updates_non_training_applications(self):
        """Test updates extracton in graph with non-training apply."""
        y = self.mlp.apply(self.x)
        with batch_normalization(self.mlp):
            y_bn = self.mlp.apply(self.x)
        graph = ComputationGraph([y_bn, y])
        updates = get_batch_normalization_updates(graph)
        self.simple_assertions(updates)

    def test_get_batch_normalization_updates_no_training(self):
        """Test for exception if there are no training-mode nodes."""
        y = self.mlp.apply(self.x)
        graph = ComputationGraph([y])
        numpy.testing.assert_raises(ValueError,
                                    get_batch_normalization_updates, graph)

    def test_get_batch_normalization_updates_duplicates_error(self):
        """Test that we get an error by default on multiple apply."""
        with batch_normalization(self.mlp):
            y = self.mlp.apply(self.x)
            y2 = self.mlp.apply(self.x)
        graph = ComputationGraph([y, y2])
        numpy.testing.assert_raises(ValueError,
                                    get_batch_normalization_updates, graph)

    def test_get_batch_normalization_updates_allow_duplicates(self):
        """Test get_batch_normalization_updates(allow_duplicates=True)."""
        with batch_normalization(self.mlp):
            y = self.mlp.apply(self.x)
            y2 = self.mlp.apply(self.x)
        graph = ComputationGraph([y, y2])
        updates = get_batch_normalization_updates(graph, allow_duplicates=True)
        self.simple_assertions(updates, num_bricks=2, num_updates=8)
Beispiel #2
0
class TestSimpleGetBatchNormalizationUpdates(object):
    def setUp(self):
        self.mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9])
        self.x = tensor.matrix()

    def simple_assertions(self, updates, num_bricks=2, num_updates=4):
        """Shared assertions for simple tests."""
        assert len(updates) == num_updates
        assert all(is_shared_variable(u[0]) for u in updates)
        # This order is somewhat arbitrary and implementation_dependent
        means = set(u[0] for u in updates
                    if has_roles(u[0], [BATCH_NORM_POPULATION_MEAN]))
        stdevs = set(u[0] for u in updates
                     if has_roles(u[0], [BATCH_NORM_POPULATION_STDEV]))
        assert means.isdisjoint(stdevs)
        assert len(set(get_brick(v) for v in means)) == num_bricks
        assert len(set(get_brick(v) for v in stdevs)) == num_bricks

    def test_get_batch_normalization_updates(self):
        """Test that get_batch_normalization_updates works as expected."""
        with batch_normalization(self.mlp):
            y_bn = self.mlp.apply(self.x)
        graph = ComputationGraph([y_bn])
        updates = get_batch_normalization_updates(graph)
        self.simple_assertions(updates)

    def test_get_batch_normalization_updates_non_training_applications(self):
        """Test updates extracton in graph with non-training apply."""
        y = self.mlp.apply(self.x)
        with batch_normalization(self.mlp):
            y_bn = self.mlp.apply(self.x)
        graph = ComputationGraph([y_bn, y])
        updates = get_batch_normalization_updates(graph)
        self.simple_assertions(updates)

    def test_get_batch_normalization_updates_no_training(self):
        """Test for exception if there are no training-mode nodes."""
        y = self.mlp.apply(self.x)
        graph = ComputationGraph([y])
        numpy.testing.assert_raises(ValueError,
                                    get_batch_normalization_updates, graph)

    def test_get_batch_normalization_updates_duplicates_error(self):
        """Test that we get an error by default on multiple apply."""
        with batch_normalization(self.mlp):
            y = self.mlp.apply(self.x)
            y2 = self.mlp.apply(self.x)
        graph = ComputationGraph([y, y2])
        numpy.testing.assert_raises(ValueError,
                                    get_batch_normalization_updates, graph)

    def test_get_batch_normalization_updates_allow_duplicates(self):
        """Test get_batch_normalization_updates(allow_duplicates=True)."""
        with batch_normalization(self.mlp):
            y = self.mlp.apply(self.x)
            y2 = self.mlp.apply(self.x)
        graph = ComputationGraph([y, y2])
        updates = get_batch_normalization_updates(graph, allow_duplicates=True)
        self.simple_assertions(updates, num_bricks=2, num_updates=8)
Beispiel #3
0
def test_batch_normalized_mlp_transformed():
    """Smoke test that a graph involving a BatchNormalizedMLP transforms."""
    x = tensor.matrix('x')
    mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9])
    with batch_normalization(mlp):
        y = mlp.apply(x)
    assert len(get_batch_normalization_updates(ComputationGraph([y]))) == 4
Beispiel #4
0
 def test_get_batch_normalization_updates_mean_only(self):
     """Test get_batch_normalization_updates with mean_only bricks."""
     mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9], mean_only=True)
     with batch_normalization(mlp):
         y_bn = mlp.apply(self.x)
     graph = ComputationGraph([y_bn])
     updates = get_batch_normalization_updates(graph)
     self.simple_assertions(updates, num_updates=2, mean_only=True)
Beispiel #5
0
 def test_get_batch_normalization_updates_mean_only(self):
     """Test get_batch_normalization_updates with mean_only bricks."""
     mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9], mean_only=True)
     with batch_normalization(mlp):
         y_bn = mlp.apply(self.x)
     graph = ComputationGraph([y_bn])
     updates = get_batch_normalization_updates(graph)
     self.simple_assertions(updates, num_updates=2, mean_only=True)
Beispiel #6
0
def training(runname, rnnType, maxPackets, packetTimeSteps, packetReverse, padOldTimeSteps, wtstd, 
             lr, decay, clippings, dimIn, dim, attentionEnc, attentionContext, numClasses, batch_size, epochs, 
             trainPercent, dataPath, loadPrepedData, channel):  # pragma: no cover
    print locals()
    print
    
    X = T.tensor4('inputs')
    Y = T.matrix('targets')
    linewt_init = IsotropicGaussian(wtstd)
    line_bias = Constant(1.0)
    rnnwt_init = IsotropicGaussian(wtstd)
    rnnbias_init = Constant(0.0)
    classifierWts = IsotropicGaussian(wtstd)

    learning_rateClass = theano.shared(np.array(lr, dtype=theano.config.floatX))
    learning_decay = np.array(decay, dtype=theano.config.floatX)
    
    ###DATA PREP
    print 'loading data'
    if loadPrepedData:
        hexSessions = loadFile(dataPath)

    else:
        sessioner = sessionizer.HexSessionizer(dataPath)
        hexSessions = sessioner.read_pcap()
        hexSessions = removeBadSessionizer(hexSessions)

    numSessions = len(hexSessions)
    print str(numSessions) + ' sessions found'
    hexSessionsKeys = order_keys(hexSessions)
    hexDict = hexTokenizer()
    
    print 'creating dictionary of ip communications'
    comsDict, uniqIPs = srcIpDict(hexSessions)
    comsDict = dictUniquerizer(comsDict)
     
    print 'initializing network graph'
    ###ENCODER
    if rnnType == 'gru':
        rnn = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gru')
        dimMultiplier = 2
    else:
        rnn = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'lstm')
        dimMultiplier = 4

    fork = Fork(output_names=['linear', 'gates'],
                name='fork', input_dim=dimIn, output_dims=[dim, dim * dimMultiplier], 
                weights_init = linewt_init, biases_init = line_bias)

    ###CONTEXT
    if rnnType == 'gru':
        rnnContext = GatedRecurrent(dim=dim, weights_init = rnnwt_init, 
                                    biases_init = rnnbias_init, name = 'gruContext')
    else:
        rnnContext = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, 
                          name = 'lstmContext')

    forkContext = Fork(output_names=['linearContext', 'gatesContext'],
                name='forkContext', input_dim=dim, output_dims=[dim, dim * dimMultiplier], 
                weights_init = linewt_init, biases_init = line_bias)

    forkDec = Fork(output_names=['linear', 'gates'],
                name='forkDec', input_dim=dim, output_dims=[dim, dim*dimMultiplier], 
                weights_init = linewt_init, biases_init = line_bias)

    #CLASSIFIER
    bmlp = BatchNormalizedMLP( activations=[Tanh(),Tanh()], 
               dims=[dim, dim, numClasses],
               weights_init=classifierWts,
               biases_init=Constant(0.0001) )

    #initialize the weights in all the functions
    fork.initialize()
    rnn.initialize()
    forkContext.initialize()
    rnnContext.initialize()
    forkDec.initialize()
    bmlp.initialize()

    def onestepEnc(X):
        data1, data2 = fork.apply(X) 

        if rnnType == 'gru':
            hEnc = rnn.apply(data1, data2) 
        else:
            hEnc, _ = rnn.apply(data2)

        return hEnc

    hEnc, _ = theano.scan(onestepEnc, X) #(mini*numPackets, packetLen, 1, hexdictLen)
        if attentionEnc:
        
        attentionmlpEnc = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts,
               biases_init=Constant(1.0))
        attentionmlpEnc.initialize()

        hEncAttn = T.reshape(hEnc, (-1, packetTimeSteps, dim))
        def onestepEncAttn(hEncAttn):

            preEncattn = attentionmlpEnc.apply(hEncAttn)
            attEncsoft = Softmax()
            attEncpyx = attEncsoft.apply(preEncattn.flatten())
            attEncpred = attEncpyx.flatten()
            attenc = T.mul(hEncAttn.dimshuffle(1,0), attEncpred).dimshuffle(1,0)

            return attenc

        attenc, _ = theano.scan(onestepEncAttn, hEncAttn)

        hEncReshape = T.reshape(T.sum(attenc, axis = 1), (-1, maxPackets, 1, dim))

    else:
        hEncReshape = T.reshape(hEnc[:,-1], (-1, maxPackets, 1, dim)) #[:,-1] takes the last rep for each packet
                                                                 #(mini, numPackets, 1, dimReduced)  #[:,-1] takes the last rep for each packet
                                                                 #(mini, numPackets, 1, dimReduced)
    def onestepContext(hEncReshape):

        data3, data4 = forkContext.apply(hEncReshape)

        if rnnType == 'gru':
            hContext = rnnContext.apply(data3, data4)
        else:
            hContext, _ = rnnContext.apply(data4)

        return hContext

    hContext, _ = theano.scan(onestepContext, hEncReshape)
    
    if attentionContext:
        attentionmlpContext = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts,
               biases_init=Constant(1.0))
        attentionmlpContext.initialize()

        hContextAttn = T.reshape(hContext, (-1,maxPackets,dim))
        def onestepContextAttn(hContextAttn):

            preContextatt = attentionmlpContext.apply(hContextAttn)
            attContextsoft = Softmax()
            attContextpyx = attContextsoft.apply(preContextatt.flatten())
            attContextpred = attContextpyx.flatten()
            attcontext = T.mul(hContextAttn.dimshuffle(1,0), attContextpred).dimshuffle(1,0)

            return attcontext

        attcontext, _ = theano.scan(onestepContextAttn, hContextAttn)
        hContextReshape = T.sum(attcontext, axis = 1)

    else:
        hContextReshape = T.reshape(hContext[:,-1], (-1,dim))

    data5, _ = forkDec.apply(hContextReshape)
    pyx = bmlp.apply(data5)
    softmax = Softmax()
    softoutClass = softmax.apply(pyx)
    costClass = T.mean(CategoricalCrossEntropy().apply(Y, softoutClass))

    #CREATE GRAPH
    cgClass = ComputationGraph([costClass])
    paramsClass = VariableFilter(roles = [PARAMETER])(cgClass.variables)
    learning = learningfunctions.Learning(costClass,paramsClass,learning_rateClass,l1=0.,l2=0.,maxnorm=0.,c=clippings)
    updatesClass = learning.Adam() 

    module_logger.info('starting graph compilation')
    classifierTrain = theano.function([X,Y], [costClass, hEnc, hContext, pyx, softoutClass], 
                                      updates=updatesClass, allow_input_downcast=True)
    classifierPredict = theano.function([X], softoutClass, allow_input_downcast=True)
    module_logger.info('graph compilation finished')
    print 'finished graph compilation'

    trainIndex = int(len(hexSessionsKeys)*trainPercent)

    epochCost = []
    gradNorms = []
    trainAcc = []
    testAcc = []

    costCollect = []
    trainCollect = []

    module_logger.info('beginning training')
    iteration = 0
    #epoch
    for epoch in xrange(epochs):

        #iteration/minibatch
        for start, end in zip(range(0, trainIndex,batch_size),
                              range(batch_size, trainIndex, batch_size)):

            trainingTargets = []
            trainingSessions = []

            #create one minibatch with 0.5 normal and 0.5 abby normal traffic
            for trainKey in range(start, end):
                sessionForEncoding = list(hexSessions[hexSessions.keys()[trainKey]][0])
    
                adfun = adversarialfunctions.Adversary(sessionForEncoding)
                adversaryList = [sessionForEncoding, 
                                 adfun.dstIpSwapOut(comsDict, uniqIPs),
                                 adfun.portDirSwitcher(),
                                 adfun.ipDirSwitcher()]
                abbyIndex = random.sample(range(len(adversaryList)), 1)[0]

                targetClasses = [0]*numClasses
                targetClasses[abbyIndex] = 1
                abbyTarget = np.array(targetClasses, dtype=theano.config.floatX)
                trainingSessions.append(abbyOneHotSes[0])
                trainingTargets.append(abbyTarget)

            sessionsMinibatch = np.asarray(trainingSessions).reshape((-1, packetTimeSteps, 1, dimIn))
            targetsMinibatch = np.asarray(trainingTargets)

            costfun = classifierTrain(sessionsMinibatch, targetsMinibatch)

            if iteration % (numSessions / (10 * batch_size)) == 0:
                costCollect.append(costfun[0])
                trainCollect.append(np.mean(np.argmax(costfun[-1],axis=1) == np.argmax(targetsMinibatch, axis=1)))
                module_logger.info('   Iteration: ', iteration)
                module_logger.info('   Cost: ', np.mean(costCollect))
                module_logger.info('   TRAIN accuracy: ', np.mean(trainCollect))
                print '   Iteration: ', iteration
                print '   Cost: ', np.mean(costCollect)
                print '   TRAIN accuracy: ', np.mean(trainCollect)

            iteration+=1

            #testing accuracy
            if iteration % (numSessions / (2 * batch_size)) == 0:
                predtar, acttar, testCollect = predictClass(classifierPredict, hexSessions, comsDict, uniqIPs, hexDict,
                                                            hexSessionsKeys,
                                                            numClasses, trainPercent, dimIn, maxPackets, packetTimeSteps,
                                                            padOldTimeSteps)
                binaryPrecisionRecall(predtar, acttar, numClasses)
                module_logger.info(str(testCollect))

            #save the models
            if iteration % (numSessions / (5 * batch_size)) == 0:
                save_model(classifierPredict)

        epochCost.append(np.mean(costCollect))
        trainAcc.append(np.mean(trainCollect))
        
        module_logger.info('Epoch: ', epoch)
        module_logger.info('Epoch cost average: ', epochCost[-1])
        module_logger.info('Epoch TRAIN accuracy: ', trainAcc[-1])
        print 'Epoch: ', epoch
        print 'Epoch cost average: ', epochCost[-1]
        print 'Epoch TRAIN accuracy: ', trainAcc[-1]

    return classifierTrain, classifierPredict