def load_params(self, model): self.W = sharedX(model[0]) self.U = sharedX(model[1]) self.b = sharedX(model[2]) self.Wpc = sharedX(model[3]) self.Uph = sharedX(model[4]) self.bc = sharedX(model[5]) self.Ua = sharedX(model[6]) self.Wc = sharedX(model[7]) if self.selector: self.Wsel = sharedX(model[8]) self.bsel = sharedX(model[9]) self.pack_params()
def orthogonal(shape, scale=1.1): ''' From Lasagne ''' flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) q = u if u.shape == flat_shape else v # pick the one with the correct shape q = q.reshape(shape) return sharedX(scale * q[:shape[0], :shape[1]])
def orthogonal(shape, scale=1.1): ''' From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120 ''' flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) return sharedX(scale * q[:shape[0], :shape[1]])
def init_params(self): W = self.init((self.in_dim, self.h_dim)) self.W = np.concatenate([ ortho_weight((self.in_dim, self.h_dim)), ortho_weight((self.in_dim, self.h_dim)), ortho_weight((self.in_dim, self.h_dim)), ortho_weight((self.in_dim, self.h_dim)) ], axis=1) self.W = sharedX(self.W) self.U = np.concatenate([ ortho_weight((self.h_dim, self.h_dim)), ortho_weight((self.h_dim, self.h_dim)), ortho_weight((self.h_dim, self.h_dim)), ortho_weight((self.h_dim, self.h_dim)) ], axis=1) self.U = sharedX(self.U) self.b = shared_zeros((4 * self.h_dim, )) self.pack_params()
def init_params(self): W = self.init((self.in_dim, self.h_dim)) self.W = np.concatenate([ ortho_weight((self.in_dim, self.h_dim)), ortho_weight((self.in_dim, self.h_dim)), ortho_weight((self.in_dim, self.h_dim)), ortho_weight((self.in_dim, self.h_dim)) ], axis=1) self.W = sharedX(self.W) self.U = np.concatenate([ ortho_weight((self.h_dim, self.h_dim)), ortho_weight((self.h_dim, self.h_dim)), ortho_weight((self.h_dim, self.h_dim)), ortho_weight((self.h_dim, self.h_dim)) ], axis=1) self.U = sharedX(self.U) self.b = shared_zeros((4 * self.h_dim, )) # attention params # e^i = Ua(tanh(Wctx.dot(context) + Uctx.dot(h_tm1) + bctx)) self.Wpc = self.init((self.ctx_dim, self.pctx_dim)) self.Uph = self.init((self.h_dim, self.pctx_dim)) self.bc = shared_zeros((self.pctx_dim, )) self.Ua = self.init((self.pctx_dim, 1)) self.ba = shared_zeros((1, )) self.Wc = self.init((self.ctx_dim, self.h_dim * 4)) if self.selector: self.Wsel = self.init( (self.h_dim, 1) ) # if Wsel=h_dim*h_dim what will happen, is it mean that it could select different feature for different sample? self.bsel = shared_zeros((1, )) self.pack_params()
def normal(shape, scale=0.05): return sharedX(np.random.randn(*shape) * scale)
def uniform(shape, scale=0.05): return sharedX(np.random.uniform(low=-scale, high=scale, size=shape))
def load_params(self,model): self.W = sharedX(model[0]) self.pack_params()
def xavier(shape, scale=None): var_w = 2. / (shape[0] + shape[1]) return sharedX(np.random.normal(0., np.sqrt(var_w), size=shape))