def __init__(self, input_size, hidden_size, init_range=1.0, previous=None): self.input_size, self.hidden_size = input_size, hidden_size if previous: self.previous = previous previous.next = self # initalize weights def init(x, y): return initalize((x, y), init_range) h, n = hidden_size, input_size self.W_hi, self.W_hf, self.W_ho, self.W_hj = init(h, h), init( h, h), init(h, h), init(h, h) self.W_xi, self.W_xf, self.W_xo, self.W_xj = init(h, n), init( h, n), init(h, n), init(h, n) self.b_i, self.b_f, self.b_o, self.b_j = zeros(h), ones(h) * 3, zeros( h), zeros(h) # initalize gradients self.dW_hi, self.dW_hf, self.dW_ho, self.dW_hj = zeros(h, h), zeros( h, h), zeros(h, h), zeros(h, h) self.dW_xi, self.dW_xf, self.dW_xo, self.dW_xj = zeros(h, n), zeros( h, n), zeros(h, n), zeros(h, n) self.db_i, self.db_f, self.db_o, self.db_j = zeros(h), zeros(h), zeros( h), zeros(h) # list of all parameters self.params = [ ('W_hi', self.W_hi, self.dW_hi), ('W_hf', self.W_hf, self.dW_hf), ('W_ho', self.W_ho, self.dW_ho), ('W_hj', self.W_hj, self.dW_hj), ('W_xi', self.W_xi, self.dW_xi), ('W_xf', self.W_xf, self.dW_xf), ('W_xo', self.W_xo, self.dW_xo), ('W_xj', self.W_xj, self.dW_xj), ('b_i', self.b_i, self.db_i), ('b_f', self.b_f, self.db_f), ('b_o', self.b_o, self.db_o), ('b_j', self.b_j, self.db_j), ] self.initSequence()
def __init__(self, target, name): self.name = name self.target = target self.tf_svd = SvdTuple(tf.svd(target)) self.init = SvdTuple( u.ones(target.shape[0], name=name+"_s_init"), u.Identity(target.shape[0], name=name+"_u_init"), u.Identity(target.shape[0], name=name+"_v_init") ) assert self.tf_svd.s.shape == self.init.s.shape assert self.tf_svd.u.shape == self.init.u.shape assert self.tf_svd.v.shape == self.init.v.shape self.cached = SvdTuple( tf.Variable(self.init.s, name=name+"_s"), tf.Variable(self.init.u, name=name+"_u"), tf.Variable(self.init.v, name=name+"_v") ) self.s = self.cached.s self.u = self.cached.u self.v = self.cached.v self.holder = SvdTuple( tf.placeholder(dtype, shape=self.cached.s.shape, name=name+"_s_holder"), tf.placeholder(dtype, shape=self.cached.u.shape, name=name+"_u_holder"), tf.placeholder(dtype, shape=self.cached.v.shape, name=name+"_v_holder") ) self.update_tf_op = tf.group( self.cached.s.assign(self.tf_svd.s), self.cached.u.assign(self.tf_svd.u), self.cached.v.assign(self.tf_svd.v) ) self.update_external_op = tf.group( self.cached.s.assign(self.holder.s), self.cached.u.assign(self.holder.u), self.cached.v.assign(self.holder.v) ) self.init_ops = (self.s.initializer, self.u.initializer, self.v.initializer)
def __init__(self, input_size, hidden_size, init_range=1.0, previous=None): self.input_size, self.hidden_size = input_size, hidden_size if previous: self.previous = previous previous.next = self # initalize weights def init(x,y): return initalize((x,y), init_range) h, n = hidden_size, input_size self.W_hi, self.W_hf, self.W_ho, self.W_hj = init(h, h), init(h, h), init(h, h), init(h, h) self.W_xi, self.W_xf, self.W_xo, self.W_xj = init(h, n), init(h, n), init(h, n), init(h, n) self.b_i, self.b_f, self.b_o, self.b_j = zeros(h), ones(h) * 3, zeros(h), zeros(h) # initalize gradients self.dW_hi, self.dW_hf, self.dW_ho, self.dW_hj = zeros(h, h), zeros(h, h), zeros(h, h), zeros(h, h) self.dW_xi, self.dW_xf, self.dW_xo, self.dW_xj = zeros(h, n), zeros(h, n), zeros(h, n), zeros(h, n) self.db_i, self.db_f, self.db_o, self.db_j = zeros(h), zeros(h), zeros(h), zeros(h) # list of all parameters self.params = [ ('W_hi', self.W_hi, self.dW_hi), ('W_hf', self.W_hf, self.dW_hf), ('W_ho', self.W_ho, self.dW_ho), ('W_hj', self.W_hj, self.dW_hj), ('W_xi', self.W_xi, self.dW_xi), ('W_xf', self.W_xf, self.dW_xf), ('W_xo', self.W_xo, self.dW_xo), ('W_xj', self.W_xj, self.dW_xj), ('b_i', self.b_i, self.db_i), ('b_f', self.b_f, self.db_f), ('b_o', self.b_o, self.db_o), ('b_j', self.b_j, self.db_j), ] self.initSequence()