def __init__(self, in_dim, hidden_dim, out_dim, bptt_truncate=-1, activation='tanh'): BasicRNN.__init__(self, in_dim, out_dim, hidden_dim, activation) # Assign instance variables self.in_dim = in_dim self.out_dim = out_dim self.hidden_dim = hidden_dim self.bptt_truncate = bptt_truncate # Initialize the network parameters W = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (3, hidden_dim, hidden_dim)) # when using mini_batch, should shift dimension U = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (3, in_dim, hidden_dim)) V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, out_dim)) b = np.zeros((3, hidden_dim)) c = np.zeros(out_dim) # Theano: Created shared variables self.U = theano.shared(name='U', value=U.astype(theano.config.floatX)) self.W = theano.shared(name='W', value=W.astype(theano.config.floatX)) self.V = theano.shared(name='V', value=V.astype(theano.config.floatX)) self.b = theano.shared(name='b', value=b.astype(theano.config.floatX)) self.c = theano.shared(name='c', value=c.astype(theano.config.floatX)) # SGD / rmsprop: Initialize parameters self.mU = theano.shared(name='mU', value=np.zeros(U.shape).astype(theano.config.floatX)) self.mW = theano.shared(name='mW', value=np.zeros(W.shape).astype(theano.config.floatX)) self.mV = theano.shared(name='mV', value=np.zeros(V.shape).astype(theano.config.floatX)) self.mb = theano.shared(name='mb', value=np.zeros(b.shape).astype(theano.config.floatX)) self.mc = theano.shared(name='mc', value=np.zeros(c.shape).astype(theano.config.floatX)) # We store the Theano graph here self.theano = {} self.params = [self.U, self.W, self.V, self.b, self.c, self.mU, self.mV, self.mW, self.mb, self.mc]
def __init__(self, n_in, n_out, n_hidden, activation='tanh', l1_reg=0.00, l2_reg=0.00): BasicRNN.__init__(self, n_in, n_out, n_hidden, activation) bh_init = np.zeros((n_hidden,), dtype=theano.config.floatX) by_init = np.zeros((n_out,), dtype=theano.config.floatX) self.bh = theano.shared(value=bh_init, name='bh') self.by = theano.shared(value=by_init, name='by') self.params = [self.U, self.W, self.V, self.bh, self.by] # for every parameter, we maintain it's last update # the idea here is to use "momentum" # keep moving mostly in the same direction self.velocity_updates = {} for param in self.params: init = np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX) self.velocity_updates[param] = theano.shared(init) self.L1_reg = float(l1_reg) self.L2_reg = float(l2_reg) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = 0 self.L1 += abs(self.W.sum()) self.L1 += abs(self.U.sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = 0 self.L2_sqr += T.sum(self.W ** 2) self.L2_sqr += T.sum(self.U ** 2)
def __init__(self, n_in, n_out, n_hidden, activation='tanh', l1_reg=0.00, l2_reg=0.00): BasicRNN.__init__(self, n_in, n_out, n_hidden, activation) bh_init = np.zeros((n_hidden, ), dtype=theano.config.floatX) by_init = np.zeros((n_out, ), dtype=theano.config.floatX) self.bh = theano.shared(value=bh_init, name='bh') self.by = theano.shared(value=by_init, name='by') self.params = [self.U, self.W, self.V, self.bh, self.by] # for every parameter, we maintain it's last update # the idea here is to use "momentum" # keep moving mostly in the same direction self.velocity_updates = {} for param in self.params: init = np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX) self.velocity_updates[param] = theano.shared(init) self.L1_reg = float(l1_reg) self.L2_reg = float(l2_reg) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = 0 self.L1 += abs(self.W.sum()) self.L1 += abs(self.U.sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = 0 self.L2_sqr += T.sum(self.W**2) self.L2_sqr += T.sum(self.U**2)