Python GatedRecurrent.initialize Examples

Programming Language: Python

Namespace/Package Name: blocks.bricks.recurrent

Class/Type: GatedRecurrent

Method/Function: initialize

Examples at hotexamples.com: 8

Python GatedRecurrent.initialize - 8 examples found. These are the top rated real world Python examples of blocks.bricks.recurrent.GatedRecurrent.initialize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GatedRecurrent(30)

apply(11)

weights_init(6)

initialize(5)

get_dim(2)

initial_states(2)

get_dims(1)

push_initialization_config(1)

Example #1

Show file

File: rnn_examples.py Project: DjAntaki/IFT6266H16

def example2():
    """GRU"""
    x = tensor.tensor3('x')
    dim = 3

    fork = Fork(input_dim=dim, output_dims=[dim, dim*2],name='fork',output_names=["linear","gates"], weights_init=initialization.Identity(),biases_init=Constant(0))
    gru = GatedRecurrent(dim=dim, weights_init=initialization.Identity(),biases_init=Constant(0))

    fork.initialize()
    gru.initialize()

    linear, gate_inputs = fork.apply(x)
    h = gru.apply(linear, gate_inputs)

    f = theano.function([x], h)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) 

    doubler = Linear(
                 input_dim=dim, output_dim=dim, weights_init=initialization.Identity(2),
                 biases_init=initialization.Constant(0))
    doubler.initialize()

    lin, gate = fork.apply(doubler.apply(x))
    h_doubler = gru.apply(lin,gate)

    f = theano.function([x], h_doubler)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))

Example #2

Show file

File: rnn_examples.py Project: DjAntaki/IFT6266H16

def example2():
    """GRU"""
    x = tensor.tensor3('x')
    dim = 3

    fork = Fork(input_dim=dim,
                output_dims=[dim, dim * 2],
                name='fork',
                output_names=["linear", "gates"],
                weights_init=initialization.Identity(),
                biases_init=Constant(0))
    gru = GatedRecurrent(dim=dim,
                         weights_init=initialization.Identity(),
                         biases_init=Constant(0))

    fork.initialize()
    gru.initialize()

    linear, gate_inputs = fork.apply(x)
    h = gru.apply(linear, gate_inputs)

    f = theano.function([x], h)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))

    doubler = Linear(input_dim=dim,
                     output_dim=dim,
                     weights_init=initialization.Identity(2),
                     biases_init=initialization.Constant(0))
    doubler.initialize()

    lin, gate = fork.apply(doubler.apply(x))
    h_doubler = gru.apply(lin, gate)

    f = theano.function([x], h_doubler)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))

Example #3

Show file

class TestGatedRecurrent(unittest.TestCase):
    def setUp(self):
        self.gated = GatedRecurrent(
            dim=3, weights_init=Constant(2),
            activation=Tanh(), gate_activation=Tanh())
        self.gated.initialize()
        self.reset_only = GatedRecurrent(
            dim=3, weights_init=IsotropicGaussian(),
            activation=Tanh(), gate_activation=Tanh(),
            use_update_gate=False, rng=numpy.random.RandomState(1))
        self.reset_only.initialize()

    def test_one_step(self):
        h0 = tensor.matrix('h0')
        x = tensor.matrix('x')
        z = tensor.matrix('z')
        r = tensor.matrix('r')
        h1 = self.gated.apply(x, z, r, h0, iterate=False)
        next_h = theano.function(inputs=[h0, x, z, r], outputs=[h1])

        h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
                                   dtype=floatX)
        x_val = 0.1 * numpy.array([[1, 2, 3], [4, 5, 6]],
                                  dtype=floatX)
        zi_val = (h0_val + x_val) / 2
        ri_val = -x_val
        W_val = 2 * numpy.ones((3, 3), dtype=floatX)

        z_val = numpy.tanh(h0_val.dot(W_val) + zi_val)
        r_val = numpy.tanh(h0_val.dot(W_val) + ri_val)
        h1_val = (z_val * numpy.tanh((r_val * h0_val).dot(W_val) + x_val)
                  + (1 - z_val) * h0_val)
        assert_allclose(h1_val, next_h(h0_val, x_val, zi_val, ri_val)[0],
                        rtol=1e-6)

    def test_reset_only_many_steps(self):
        x = tensor.tensor3('x')
        ri = tensor.tensor3('ri')
        mask = tensor.matrix('mask')
        h = self.reset_only.apply(x, reset_inputs=ri, mask=mask)
        calc_h = theano.function(inputs=[x, ri, mask], outputs=[h])

        x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))),
                                    dtype=floatX)
        x_val = numpy.ones((24, 4, 3), dtype=floatX) * x_val[..., None]
        ri_val = 0.3 - x_val
        mask_val = numpy.ones((24, 4), dtype=floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=floatX)
        W = self.reset_only.state_to_state.get_value()
        U = self.reset_only.state_to_reset.get_value()

        for i in range(1, 25):
            r_val = numpy.tanh(h_val[i - 1].dot(U) + ri_val[i - 1])
            h_val[i] = numpy.tanh((r_val * h_val[i - 1]).dot(W)
                                  + x_val[i - 1])
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
        h_val = h_val[1:]
        # TODO Figure out why this tolerance needs to be so big
        assert_allclose(h_val, calc_h(x_val, ri_val,  mask_val)[0], 1e-03)

Example #4

Show file

File: test_recurrent.py Project: zrustc/blocks

class TestGatedRecurrent(unittest.TestCase):
    def setUp(self):
        self.gated = GatedRecurrent(
            dim=3, activation=Tanh(),
            gate_activation=Tanh(), weights_init=Constant(2))
        self.gated.initialize()
        self.reset_only = GatedRecurrent(
            dim=3, activation=Tanh(),
            gate_activation=Tanh(),
            weights_init=IsotropicGaussian(), seed=1)
        self.reset_only.initialize()

    def test_one_step(self):
        h0 = tensor.matrix('h0')
        x = tensor.matrix('x')
        gi = tensor.matrix('gi')
        h1 = self.gated.apply(x, gi, h0, iterate=False)
        next_h = theano.function(inputs=[h0, x, gi], outputs=[h1])

        h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
                                   dtype=theano.config.floatX)
        x_val = 0.1 * numpy.array([[1, 2, 3], [4, 5, 6]],
                                  dtype=theano.config.floatX)
        zi_val = (h0_val + x_val) / 2
        ri_val = -x_val
        W_val = 2 * numpy.ones((3, 3), dtype=theano.config.floatX)

        z_val = numpy.tanh(h0_val.dot(W_val) + zi_val)
        r_val = numpy.tanh(h0_val.dot(W_val) + ri_val)
        h1_val = (z_val * numpy.tanh((r_val * h0_val).dot(W_val) + x_val) +
                  (1 - z_val) * h0_val)
        assert_allclose(
            h1_val, next_h(h0_val, x_val, numpy.hstack([zi_val, ri_val]))[0],
            rtol=1e-6)

    def test_many_steps(self):
        x = tensor.tensor3('x')
        gi = tensor.tensor3('gi')
        mask = tensor.matrix('mask')
        h = self.reset_only.apply(x, gi, mask=mask)
        calc_h = theano.function(inputs=[x, gi, mask], outputs=[h])

        x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))),
                                    dtype=theano.config.floatX)
        x_val = numpy.ones((24, 4, 3),
                           dtype=theano.config.floatX) * x_val[..., None]
        ri_val = 0.3 - x_val
        zi_val = 2 * ri_val
        mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
        W = self.reset_only.state_to_state.get_value()
        Wz = self.reset_only.state_to_gates.get_value()[:, :3]
        Wr = self.reset_only.state_to_gates.get_value()[:, 3:]

        for i in range(1, 25):
            z_val = numpy.tanh(h_val[i - 1].dot(Wz) + zi_val[i - 1])
            r_val = numpy.tanh(h_val[i - 1].dot(Wr) + ri_val[i - 1])
            h_val[i] = numpy.tanh((r_val * h_val[i - 1]).dot(W) +
                                  x_val[i - 1])
            h_val[i] = z_val * h_val[i] + (1 - z_val) * h_val[i - 1]
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
        h_val = h_val[1:]
        # TODO Figure out why this tolerance needs to be so big
        assert_allclose(
            h_val,
            calc_h(x_val, numpy.concatenate(
                [zi_val, ri_val], axis=2), mask_val)[0],
            1e-04)

        # Also test that initial state is a parameter
        initial_state, = VariableFilter(roles=[INITIAL_STATE])(
            ComputationGraph(h))
        assert is_shared_variable(initial_state)
        assert initial_state.name == 'initial_state'

Example #5

Show file

File: test_recurrent.py Project: kelvinxu/blocks

class TestGatedRecurrent(unittest.TestCase):
    def setUp(self):
        self.gated = GatedRecurrent(
            dim=3, weights_init=Constant(2),
            activation=Tanh(), gate_activation=Tanh())
        self.gated.initialize()
        self.reset_only = GatedRecurrent(
            dim=3, weights_init=IsotropicGaussian(),
            activation=Tanh(), gate_activation=Tanh(),
            use_update_gate=False, seed=1)
        self.reset_only.initialize()

    def test_one_step(self):
        h0 = tensor.matrix('h0')
        x = tensor.matrix('x')
        z = tensor.matrix('z')
        r = tensor.matrix('r')
        h1 = self.gated.apply(x, z, r, h0, iterate=False)
        next_h = theano.function(inputs=[h0, x, z, r], outputs=[h1])

        h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
                                   dtype=floatX)
        x_val = 0.1 * numpy.array([[1, 2, 3], [4, 5, 6]],
                                  dtype=floatX)
        zi_val = (h0_val + x_val) / 2
        ri_val = -x_val
        W_val = 2 * numpy.ones((3, 3), dtype=floatX)

        z_val = numpy.tanh(h0_val.dot(W_val) + zi_val)
        r_val = numpy.tanh(h0_val.dot(W_val) + ri_val)
        h1_val = (z_val * numpy.tanh((r_val * h0_val).dot(W_val) + x_val) +
                  (1 - z_val) * h0_val)
        assert_allclose(h1_val, next_h(h0_val, x_val, zi_val, ri_val)[0],
                        rtol=1e-6)

    def test_reset_only_many_steps(self):
        x = tensor.tensor3('x')
        ri = tensor.tensor3('ri')
        mask = tensor.matrix('mask')
        h = self.reset_only.apply(x, reset_inputs=ri, mask=mask)
        calc_h = theano.function(inputs=[x, ri, mask], outputs=[h])

        x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))),
                                    dtype=floatX)
        x_val = numpy.ones((24, 4, 3), dtype=floatX) * x_val[..., None]
        ri_val = 0.3 - x_val
        mask_val = numpy.ones((24, 4), dtype=floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=floatX)
        W = self.reset_only.state_to_state.get_value()
        U = self.reset_only.state_to_reset.get_value()

        for i in range(1, 25):
            r_val = numpy.tanh(h_val[i - 1].dot(U) + ri_val[i - 1])
            h_val[i] = numpy.tanh((r_val * h_val[i - 1]).dot(W) +
                                  x_val[i - 1])
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
        h_val = h_val[1:]
        # TODO Figure out why this tolerance needs to be so big
        assert_allclose(h_val, calc_h(x_val, ri_val,  mask_val)[0], 1e-03)

Example #6

Show file

File: bricks.py Project: caomw/MLFun

class GatedRecurrentFull(Initializable):
    """A wrapper around the GatedRecurrent brick that improves usability.
    It contains:
        * A fork to map to initialize the reset and the update units.
        * Better initialization to initialize the different pieces
    While this works, there is probably a better more elegant way to do this.

    Parameters
    ----------
    hidden_dim : int
        dimension of the hidden state
    activation : :class:`.Brick`
    gate_activation: :class:`.Brick`

    state_to_state_init: object
        Weight Initialization
    state_to_reset_init: object
        Weight Initialization
    state_to_update_init: obje64
        Weight Initialization

    input_to_state_transform: :class:`.Brick`
        [CvMG14] uses Linear transform
    input_to_reset_transform: :class:`.Brick`
        [CvMG14] uses Linear transform
    input_to_update_transform: :class:`.Brick`
        [CvMG14] uses Linear transform

    References
    ---------
        self.rnn = GatedRecurrent(
                weights_init=Constant(np.nan),
                dim=self.hidden_dim,
                activation=self.activation,
                gate_activation=self.gate_activation)
    .. [CvMG14] Kyunghyun Cho, Bart van Merriënboer, Çağlar Gülçehre,
        Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua
        Bengio, *Learning Phrase Representations using RNN Encoder-Decoder
        for Statistical Machine Translation*, EMNLP (2014), pp. 1724-1734.

    """
    @lazy(allocation=['hidden_dim', 'state_to_state_init', 'state_to_update_init', 'state_to_reset_init'],
            initialization=['input_to_state_transform', 'input_to_update_transform', 'input_to_reset_transform'])
    def __init__(self, hidden_dim, activation=None, gate_activation=None,
        state_to_state_init=None, state_to_update_init=None, state_to_reset_init=None,
        input_to_state_transform=None, input_to_update_transform=None, input_to_reset_transform=None,
        **kwargs):

        super(GatedRecurrentFull, self).__init__(**kwargs)
        self.hidden_dim = hidden_dim

        self.state_to_state_init = state_to_state_init
        self.state_to_update_init = state_to_update_init
        self.state_to_reset_init = state_to_reset_init

        self.input_to_state_transform = input_to_state_transform
        self.input_to_update_transform = input_to_update_transform
        self.input_to_reset_transform = input_to_reset_transform
        self.input_to_state_transform.name += "_input_to_state_transform"
        self.input_to_update_transform.name += "_input_to_update_transform"
        self.input_to_reset_transform.name += "_input_to_reset_transform"

        self.use_mine = True
        if self.use_mine:
            self.rnn = GatedRecurrentFast(
                    weights_init=Constant(np.nan),
                    dim=self.hidden_dim,
                    activation=activation,
                    gate_activation=gate_activation)
        else:
            self.rnn = GatedRecurrent(
                    weights_init=Constant(np.nan),
                    dim=self.hidden_dim,
                    activation=activation,
                    gate_activation=gate_activation)

        self.children = [self.rnn,
                self.input_to_state_transform, self.input_to_update_transform, self.input_to_reset_transform]
        self.children.extend(self.rnn.children)

    def initialize(self):
        super(GatedRecurrentFull, self).initialize()

        self.input_to_state_transform.initialize()
        self.input_to_update_transform.initialize()
        self.input_to_reset_transform.initialize()

        self.rnn.initialize()

        weight_shape = (self.hidden_dim, self.hidden_dim)
        state_to_state = self.state_to_state_init.generate(rng=self.rng, shape=weight_shape)
        state_to_update= self.state_to_update_init.generate(rng=self.rng, shape=weight_shape)
        state_to_reset = self.state_to_reset_init.generate(rng=self.rng, shape=weight_shape)

        self.rnn.state_to_state.set_value(state_to_state)

        if self.use_mine:
            self.rnn.state_to_update.set_value(state_to_update)
            self.rnn.state_to_reset.set_value(state_to_reset)
        else:
            self.rnn.state_to_gates.set_value(np.hstack((state_to_update, state_to_reset)))

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_, mask=None):
        """

        Parameters
        ----------
        inputs_ : :class:`~tensor.TensorVariable`
            sequence to feed into GRU. Axes are mb, sequence, features

        mask : :class:`~tensor.TensorVariable`
            A 1D binary array with 1 or 0 to represent data given available.

        Returns
        -------
        output: :class:`theano.tensor.TensorVariable`
            sequence to feed out. Axes are batch, sequence, features
        """
        states_from_in = self.input_to_state_transform.apply(input_)
        update_from_in = self.input_to_update_transform.apply(input_)
        reset_from_in = self.input_to_reset_transform.apply(input_)

        gate_inputs = tensor.concatenate([update_from_in, reset_from_in], axis=2)

        if self.use_mine:
            output = self.rnn.apply(inputs=states_from_in, update_inputs=update_from_in, reset_inputs=reset_from_in, mask=mask)
        else:
            output = self.rnn.apply(inputs=states_from_in, gate_inputs=gate_inputs)

        return output

Example #7

Show file

def training(runname, rnnType, maxPackets, packetTimeSteps, packetReverse, padOldTimeSteps, wtstd, 
             lr, decay, clippings, dimIn, dim, attentionEnc, attentionContext, numClasses, batch_size, epochs, 
             trainPercent, dataPath, loadPrepedData, channel):  # pragma: no cover
    print locals()
    print
    
    X = T.tensor4('inputs')
    Y = T.matrix('targets')
    linewt_init = IsotropicGaussian(wtstd)
    line_bias = Constant(1.0)
    rnnwt_init = IsotropicGaussian(wtstd)
    rnnbias_init = Constant(0.0)
    classifierWts = IsotropicGaussian(wtstd)

    learning_rateClass = theano.shared(np.array(lr, dtype=theano.config.floatX))
    learning_decay = np.array(decay, dtype=theano.config.floatX)
    
    ###DATA PREP
    print 'loading data'
    if loadPrepedData:
        hexSessions = loadFile(dataPath)

    else:
        sessioner = sessionizer.HexSessionizer(dataPath)
        hexSessions = sessioner.read_pcap()
        hexSessions = removeBadSessionizer(hexSessions)

    numSessions = len(hexSessions)
    print str(numSessions) + ' sessions found'
    hexSessionsKeys = order_keys(hexSessions)
    hexDict = hexTokenizer()
    
    print 'creating dictionary of ip communications'
    comsDict, uniqIPs = srcIpDict(hexSessions)
    comsDict = dictUniquerizer(comsDict)
     
    print 'initializing network graph'
    ###ENCODER
    if rnnType == 'gru':
        rnn = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gru')
        dimMultiplier = 2
    else:
        rnn = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'lstm')
        dimMultiplier = 4

    fork = Fork(output_names=['linear', 'gates'],
                name='fork', input_dim=dimIn, output_dims=[dim, dim * dimMultiplier], 
                weights_init = linewt_init, biases_init = line_bias)

    ###CONTEXT
    if rnnType == 'gru':
        rnnContext = GatedRecurrent(dim=dim, weights_init = rnnwt_init, 
                                    biases_init = rnnbias_init, name = 'gruContext')
    else:
        rnnContext = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, 
                          name = 'lstmContext')

    forkContext = Fork(output_names=['linearContext', 'gatesContext'],
                name='forkContext', input_dim=dim, output_dims=[dim, dim * dimMultiplier], 
                weights_init = linewt_init, biases_init = line_bias)

    forkDec = Fork(output_names=['linear', 'gates'],
                name='forkDec', input_dim=dim, output_dims=[dim, dim*dimMultiplier], 
                weights_init = linewt_init, biases_init = line_bias)

    #CLASSIFIER
    bmlp = BatchNormalizedMLP( activations=[Tanh(),Tanh()], 
               dims=[dim, dim, numClasses],
               weights_init=classifierWts,
               biases_init=Constant(0.0001) )

    #initialize the weights in all the functions
    fork.initialize()
    rnn.initialize()
    forkContext.initialize()
    rnnContext.initialize()
    forkDec.initialize()
    bmlp.initialize()

    def onestepEnc(X):
        data1, data2 = fork.apply(X) 

        if rnnType == 'gru':
            hEnc = rnn.apply(data1, data2) 
        else:
            hEnc, _ = rnn.apply(data2)

        return hEnc

    hEnc, _ = theano.scan(onestepEnc, X) #(mini*numPackets, packetLen, 1, hexdictLen)
        if attentionEnc:
        
        attentionmlpEnc = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts,
               biases_init=Constant(1.0))
        attentionmlpEnc.initialize()

        hEncAttn = T.reshape(hEnc, (-1, packetTimeSteps, dim))
        def onestepEncAttn(hEncAttn):

            preEncattn = attentionmlpEnc.apply(hEncAttn)
            attEncsoft = Softmax()
            attEncpyx = attEncsoft.apply(preEncattn.flatten())
            attEncpred = attEncpyx.flatten()
            attenc = T.mul(hEncAttn.dimshuffle(1,0), attEncpred).dimshuffle(1,0)

            return attenc

        attenc, _ = theano.scan(onestepEncAttn, hEncAttn)

        hEncReshape = T.reshape(T.sum(attenc, axis = 1), (-1, maxPackets, 1, dim))

    else:
        hEncReshape = T.reshape(hEnc[:,-1], (-1, maxPackets, 1, dim)) #[:,-1] takes the last rep for each packet
                                                                 #(mini, numPackets, 1, dimReduced)  #[:,-1] takes the last rep for each packet
                                                                 #(mini, numPackets, 1, dimReduced)
    def onestepContext(hEncReshape):

        data3, data4 = forkContext.apply(hEncReshape)

        if rnnType == 'gru':
            hContext = rnnContext.apply(data3, data4)
        else:
            hContext, _ = rnnContext.apply(data4)

        return hContext

    hContext, _ = theano.scan(onestepContext, hEncReshape)
    
    if attentionContext:
        attentionmlpContext = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts,
               biases_init=Constant(1.0))
        attentionmlpContext.initialize()

        hContextAttn = T.reshape(hContext, (-1,maxPackets,dim))
        def onestepContextAttn(hContextAttn):

            preContextatt = attentionmlpContext.apply(hContextAttn)
            attContextsoft = Softmax()
            attContextpyx = attContextsoft.apply(preContextatt.flatten())
            attContextpred = attContextpyx.flatten()
            attcontext = T.mul(hContextAttn.dimshuffle(1,0), attContextpred).dimshuffle(1,0)

            return attcontext

        attcontext, _ = theano.scan(onestepContextAttn, hContextAttn)
        hContextReshape = T.sum(attcontext, axis = 1)

    else:
        hContextReshape = T.reshape(hContext[:,-1], (-1,dim))

    data5, _ = forkDec.apply(hContextReshape)
    pyx = bmlp.apply(data5)
    softmax = Softmax()
    softoutClass = softmax.apply(pyx)
    costClass = T.mean(CategoricalCrossEntropy().apply(Y, softoutClass))

    #CREATE GRAPH
    cgClass = ComputationGraph([costClass])
    paramsClass = VariableFilter(roles = [PARAMETER])(cgClass.variables)
    learning = learningfunctions.Learning(costClass,paramsClass,learning_rateClass,l1=0.,l2=0.,maxnorm=0.,c=clippings)
    updatesClass = learning.Adam() 

    module_logger.info('starting graph compilation')
    classifierTrain = theano.function([X,Y], [costClass, hEnc, hContext, pyx, softoutClass], 
                                      updates=updatesClass, allow_input_downcast=True)
    classifierPredict = theano.function([X], softoutClass, allow_input_downcast=True)
    module_logger.info('graph compilation finished')
    print 'finished graph compilation'

    trainIndex = int(len(hexSessionsKeys)*trainPercent)

    epochCost = []
    gradNorms = []
    trainAcc = []
    testAcc = []

    costCollect = []
    trainCollect = []

    module_logger.info('beginning training')
    iteration = 0
    #epoch
    for epoch in xrange(epochs):

        #iteration/minibatch
        for start, end in zip(range(0, trainIndex,batch_size),
                              range(batch_size, trainIndex, batch_size)):

            trainingTargets = []
            trainingSessions = []

            #create one minibatch with 0.5 normal and 0.5 abby normal traffic
            for trainKey in range(start, end):
                sessionForEncoding = list(hexSessions[hexSessions.keys()[trainKey]][0])
    
                adfun = adversarialfunctions.Adversary(sessionForEncoding)
                adversaryList = [sessionForEncoding, 
                                 adfun.dstIpSwapOut(comsDict, uniqIPs),
                                 adfun.portDirSwitcher(),
                                 adfun.ipDirSwitcher()]
                abbyIndex = random.sample(range(len(adversaryList)), 1)[0]

                targetClasses = [0]*numClasses
                targetClasses[abbyIndex] = 1
                abbyTarget = np.array(targetClasses, dtype=theano.config.floatX)
                trainingSessions.append(abbyOneHotSes[0])
                trainingTargets.append(abbyTarget)

            sessionsMinibatch = np.asarray(trainingSessions).reshape((-1, packetTimeSteps, 1, dimIn))
            targetsMinibatch = np.asarray(trainingTargets)

            costfun = classifierTrain(sessionsMinibatch, targetsMinibatch)

            if iteration % (numSessions / (10 * batch_size)) == 0:
                costCollect.append(costfun[0])
                trainCollect.append(np.mean(np.argmax(costfun[-1],axis=1) == np.argmax(targetsMinibatch, axis=1)))
                module_logger.info('   Iteration: ', iteration)
                module_logger.info('   Cost: ', np.mean(costCollect))
                module_logger.info('   TRAIN accuracy: ', np.mean(trainCollect))
                print '   Iteration: ', iteration
                print '   Cost: ', np.mean(costCollect)
                print '   TRAIN accuracy: ', np.mean(trainCollect)

            iteration+=1

            #testing accuracy
            if iteration % (numSessions / (2 * batch_size)) == 0:
                predtar, acttar, testCollect = predictClass(classifierPredict, hexSessions, comsDict, uniqIPs, hexDict,
                                                            hexSessionsKeys,
                                                            numClasses, trainPercent, dimIn, maxPackets, packetTimeSteps,
                                                            padOldTimeSteps)
                binaryPrecisionRecall(predtar, acttar, numClasses)
                module_logger.info(str(testCollect))

            #save the models
            if iteration % (numSessions / (5 * batch_size)) == 0:
                save_model(classifierPredict)

        epochCost.append(np.mean(costCollect))
        trainAcc.append(np.mean(trainCollect))
        
        module_logger.info('Epoch: ', epoch)
        module_logger.info('Epoch cost average: ', epochCost[-1])
        module_logger.info('Epoch TRAIN accuracy: ', trainAcc[-1])
        print 'Epoch: ', epoch
        print 'Epoch cost average: ', epochCost[-1]
        print 'Epoch TRAIN accuracy: ', trainAcc[-1]

    return classifierTrain, classifierPredict

Example #8

Show file

class GatedRecurrentFull(Initializable):
    """A wrapper around the GatedRecurrent brick that improves usability.
    It contains:
        * A fork to map to initialize the reset and the update units.
        * Better initialization to initialize the different pieces
    While this works, there is probably a better more elegant way to do this.

    Parameters
    ----------
    hidden_dim : int
        dimension of the hidden state
    activation : :class:`.Brick`
    gate_activation: :class:`.Brick`

    state_to_state_init: object
        Weight Initialization
    state_to_reset_init: object
        Weight Initialization
    state_to_update_init: obje64
        Weight Initialization

    input_to_state_transform: :class:`.Brick`
        [CvMG14] uses Linear transform
    input_to_reset_transform: :class:`.Brick`
        [CvMG14] uses Linear transform
    input_to_update_transform: :class:`.Brick`
        [CvMG14] uses Linear transform

    References
    ---------
        self.rnn = GatedRecurrent(
                weights_init=Constant(np.nan),
                dim=self.hidden_dim,
                activation=self.activation,
                gate_activation=self.gate_activation)
    .. [CvMG14] Kyunghyun Cho, Bart van Merriënboer, Çağlar Gülçehre,
        Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua
        Bengio, *Learning Phrase Representations using RNN Encoder-Decoder
        for Statistical Machine Translation*, EMNLP (2014), pp. 1724-1734.

    """
    @lazy(allocation=[
        'hidden_dim', 'state_to_state_init', 'state_to_update_init',
        'state_to_reset_init'
    ],
          initialization=[
              'input_to_state_transform', 'input_to_update_transform',
              'input_to_reset_transform'
          ])
    def __init__(self,
                 hidden_dim,
                 activation=None,
                 gate_activation=None,
                 state_to_state_init=None,
                 state_to_update_init=None,
                 state_to_reset_init=None,
                 input_to_state_transform=None,
                 input_to_update_transform=None,
                 input_to_reset_transform=None,
                 **kwargs):

        super(GatedRecurrentFull, self).__init__(**kwargs)
        self.hidden_dim = hidden_dim

        self.state_to_state_init = state_to_state_init
        self.state_to_update_init = state_to_update_init
        self.state_to_reset_init = state_to_reset_init

        self.input_to_state_transform = input_to_state_transform
        self.input_to_update_transform = input_to_update_transform
        self.input_to_reset_transform = input_to_reset_transform
        self.input_to_state_transform.name += "_input_to_state_transform"
        self.input_to_update_transform.name += "_input_to_update_transform"
        self.input_to_reset_transform.name += "_input_to_reset_transform"

        self.use_mine = True
        if self.use_mine:
            self.rnn = GatedRecurrentFast(weights_init=Constant(np.nan),
                                          dim=self.hidden_dim,
                                          activation=activation,
                                          gate_activation=gate_activation)
        else:
            self.rnn = GatedRecurrent(weights_init=Constant(np.nan),
                                      dim=self.hidden_dim,
                                      activation=activation,
                                      gate_activation=gate_activation)

        self.children = [
            self.rnn, self.input_to_state_transform,
            self.input_to_update_transform, self.input_to_reset_transform
        ]
        self.children.extend(self.rnn.children)

    def initialize(self):
        super(GatedRecurrentFull, self).initialize()

        self.input_to_state_transform.initialize()
        self.input_to_update_transform.initialize()
        self.input_to_reset_transform.initialize()

        self.rnn.initialize()

        weight_shape = (self.hidden_dim, self.hidden_dim)
        state_to_state = self.state_to_state_init.generate(rng=self.rng,
                                                           shape=weight_shape)
        state_to_update = self.state_to_update_init.generate(
            rng=self.rng, shape=weight_shape)
        state_to_reset = self.state_to_reset_init.generate(rng=self.rng,
                                                           shape=weight_shape)

        self.rnn.state_to_state.set_value(state_to_state)

        if self.use_mine:
            self.rnn.state_to_update.set_value(state_to_update)
            self.rnn.state_to_reset.set_value(state_to_reset)
        else:
            self.rnn.state_to_gates.set_value(
                np.hstack((state_to_update, state_to_reset)))

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_, mask=None):
        """

        Parameters
        ----------
        inputs_ : :class:`~tensor.TensorVariable`
            sequence to feed into GRU. Axes are mb, sequence, features

        mask : :class:`~tensor.TensorVariable`
            A 1D binary array with 1 or 0 to represent data given available.

        Returns
        -------
        output: :class:`theano.tensor.TensorVariable`
            sequence to feed out. Axes are batch, sequence, features
        """
        states_from_in = self.input_to_state_transform.apply(input_)
        update_from_in = self.input_to_update_transform.apply(input_)
        reset_from_in = self.input_to_reset_transform.apply(input_)

        gate_inputs = tensor.concatenate([update_from_in, reset_from_in],
                                         axis=2)

        if self.use_mine:
            output = self.rnn.apply(inputs=states_from_in,
                                    update_inputs=update_from_in,
                                    reset_inputs=reset_from_in,
                                    mask=mask)
        else:
            output = self.rnn.apply(inputs=states_from_in,
                                    gate_inputs=gate_inputs)

        return output