def __init__(self, in_dim, hidden_dim, out_dim, bptt_truncate=-1, activation='tanh'):
     BasicRNN.__init__(self, in_dim, out_dim, hidden_dim, activation)
     # Assign instance variables
     self.in_dim = in_dim
     self.out_dim = out_dim
     self.hidden_dim = hidden_dim
     self.bptt_truncate = bptt_truncate
     # Initialize the network parameters
     W = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (3, hidden_dim, hidden_dim))
     # when using mini_batch, should shift dimension
     U = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (3, in_dim, hidden_dim))
     V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, out_dim))
     b = np.zeros((3, hidden_dim))
     c = np.zeros(out_dim)
     # Theano: Created shared variables
     self.U = theano.shared(name='U', value=U.astype(theano.config.floatX))
     self.W = theano.shared(name='W', value=W.astype(theano.config.floatX))
     self.V = theano.shared(name='V', value=V.astype(theano.config.floatX))
     self.b = theano.shared(name='b', value=b.astype(theano.config.floatX))
     self.c = theano.shared(name='c', value=c.astype(theano.config.floatX))
     # SGD / rmsprop: Initialize parameters
     self.mU = theano.shared(name='mU', value=np.zeros(U.shape).astype(theano.config.floatX))
     self.mW = theano.shared(name='mW', value=np.zeros(W.shape).astype(theano.config.floatX))
     self.mV = theano.shared(name='mV', value=np.zeros(V.shape).astype(theano.config.floatX))
     self.mb = theano.shared(name='mb', value=np.zeros(b.shape).astype(theano.config.floatX))
     self.mc = theano.shared(name='mc', value=np.zeros(c.shape).astype(theano.config.floatX))
     # We store the Theano graph here
     self.theano = {}
     self.params = [self.U, self.W, self.V, self.b, self.c,
                    self.mU, self.mV, self.mW, self.mb, self.mc]
Ejemplo n.º 2
0
    def __init__(self, n_in, n_out, n_hidden, activation='tanh',
                 l1_reg=0.00, l2_reg=0.00):
        BasicRNN.__init__(self, n_in, n_out, n_hidden, activation)
        bh_init = np.zeros((n_hidden,), dtype=theano.config.floatX)
        by_init = np.zeros((n_out,), dtype=theano.config.floatX)
        self.bh = theano.shared(value=bh_init, name='bh')
        self.by = theano.shared(value=by_init, name='by')
        self.params = [self.U, self.W, self.V, self.bh, self.by]

        # for every parameter, we maintain it's last update
        # the idea here is to use "momentum"
        # keep moving mostly in the same direction
        self.velocity_updates = {}
        for param in self.params:
            init = np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)
            self.velocity_updates[param] = theano.shared(init)

        self.L1_reg = float(l1_reg)
        self.L2_reg = float(l2_reg)
        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = 0
        self.L1 += abs(self.W.sum())
        self.L1 += abs(self.U.sum())

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = 0
        self.L2_sqr += T.sum(self.W ** 2)
        self.L2_sqr += T.sum(self.U ** 2)
Ejemplo n.º 3
0
    def __init__(self,
                 n_in,
                 n_out,
                 n_hidden,
                 activation='tanh',
                 l1_reg=0.00,
                 l2_reg=0.00):
        BasicRNN.__init__(self, n_in, n_out, n_hidden, activation)
        bh_init = np.zeros((n_hidden, ), dtype=theano.config.floatX)
        by_init = np.zeros((n_out, ), dtype=theano.config.floatX)
        self.bh = theano.shared(value=bh_init, name='bh')
        self.by = theano.shared(value=by_init, name='by')
        self.params = [self.U, self.W, self.V, self.bh, self.by]

        # for every parameter, we maintain it's last update
        # the idea here is to use "momentum"
        # keep moving mostly in the same direction
        self.velocity_updates = {}
        for param in self.params:
            init = np.zeros(param.get_value(borrow=True).shape,
                            dtype=theano.config.floatX)
            self.velocity_updates[param] = theano.shared(init)

        self.L1_reg = float(l1_reg)
        self.L2_reg = float(l2_reg)
        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = 0
        self.L1 += abs(self.W.sum())
        self.L1 += abs(self.U.sum())

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = 0
        self.L2_sqr += T.sum(self.W**2)
        self.L2_sqr += T.sum(self.U**2)