def get_value(self, tau0): dt = self.delta ar, cr, a, b, c, d = self.term.coefficients # Format the lags correctly tau0 = tt.abs_(tau0) tau = tau0[..., None] # Precompute some factors dpt = dt + tau dmt = dt - tau # Real parts: # tau > Delta crd = cr * dt cosh = tt.cosh(crd) norm = 2 * ar / crd ** 2 K_large = tt.sum(norm * (cosh - 1) * tt.exp(-cr * tau), axis=-1) # tau < Delta crdmt = cr * dmt K_small = K_large + tt.sum(norm * (crdmt - tt.sinh(crdmt)), axis=-1) # Complex part cd = c * dt dd = d * dt c2 = c ** 2 d2 = d ** 2 c2pd2 = c2 + d2 C1 = a * (c2 - d2) + 2 * b * c * d C2 = b * (c2 - d2) - 2 * a * c * d norm = 1.0 / (dt * c2pd2) ** 2 k0 = tt.exp(-c * tau) cdt = tt.cos(d * tau) sdt = tt.sin(d * tau) # For tau > Delta cos_term = 2 * (tt.cosh(cd) * tt.cos(dd) - 1) sin_term = 2 * (tt.sinh(cd) * tt.sin(dd)) factor = k0 * norm K_large += tt.sum( (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1 ) K_large += tt.sum( (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1 ) # tau < Delta edmt = tt.exp(-c * dmt) edpt = tt.exp(-c * dpt) cos_term = ( edmt * tt.cos(d * dmt) + edpt * tt.cos(d * dpt) - 2 * k0 * cdt ) sin_term = ( edmt * tt.sin(d * dmt) + edpt * tt.sin(d * dpt) - 2 * k0 * sdt ) K_small += tt.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1) K_small += tt.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1) return tt.switch(tt.le(tau0, dt), K_small, K_large)
def get_coefficients(self): ar, cr, a, b, c, d = self.term.coefficients # Real componenets crd = cr * self.delta coeffs = [2 * ar * (tt.cosh(crd) - 1) / crd**2, cr] # Imaginary coefficients cd = c * self.delta dd = d * self.delta c2 = c**2 d2 = d**2 factor = 2.0 / (self.delta * (c2 + d2))**2 cos_term = tt.cosh(cd) * tt.cos(dd) - 1 sin_term = tt.sinh(cd) * tt.sin(dd) C1 = a * (c2 - d2) + 2 * b * c * d C2 = b * (c2 - d2) - 2 * a * c * d coeffs += [ factor * (C1 * cos_term - C2 * sin_term), factor * (C2 * cos_term + C1 * sin_term), c, d, ] return coeffs
def test_jax_multioutput(): x = tt.vector("x") x.tag.test_value = np.r_[1.0, 2.0].astype(theano.config.floatX) y = tt.vector("y") y.tag.test_value = np.r_[3.0, 4.0].astype(theano.config.floatX) w = tt.cosh(x**2 + y / 3.0) v = tt.cosh(x / 3.0 + y**2) fgraph = theano.gof.FunctionGraph([x, y], [w, v]) compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
def __create_axons(self, w_init_range=5.0): I = T.vector('I') #Define W terms: self.V = theano.shared(np.zeros(self.num_neurons, dtype=theano.config.floatX), 'V') self.W = theano.shared(np.zeros(self.num_neurons, dtype=theano.config.floatX), 'W') #Neuron equations defined according to Morris-Lecar model DE # and approximated using Euler's method m_inf = 1/2.0*( 1+T.tanh( (self.V-self.a1) / self.a2 ) ) w_inf = 1/2.0*( 1+T.tanh( (self.V-self.a3) / self.a4 ) ) t_inf = 1/T.cosh( (self.V-self.a3) / (2.0*self.a4) ) Wnew = self.phi*( (w_inf-self.W) / t_inf ) Vnew = (-self.gC * m_inf * (self.V-self.eC) - self.gK * self.W * (self.V-self.eK) - self.gL * (self.V-self.eL) + I)/self.C w_update = [(self.W, self.W + self.del_t*Wnew)] Vout = self.V + self.del_t*Vnew v_update = [(self.V, Vout)] updates = w_update + v_update return theano.function(inputs=[I], outputs=[Vout, self.W], updates=updates, allow_input_downcast=True)
def test_jax_basic(): x = tt.matrix("x") y = tt.matrix("y") # `ScalarOp` z = tt.cosh(x**2 + y / 3.0) # `[Inc]Subtensor` out = tt.set_subtensor(z[0], -10.0) out = tt.inc_subtensor(out[0, 1], 2.0) out = out[:5, :3] out_fg = theano.gof.FunctionGraph([x, y], [out]) test_input_vals = [ np.tile(np.arange(10), (10, 1)).astype(tt.config.floatX), np.tile(np.arange(10, 20), (10, 1)).astype(tt.config.floatX), ] (jax_res, ) = compare_jax_and_py(out_fg, test_input_vals) # Confirm that the `Subtensor` slice operations are correct assert jax_res.shape == (5, 3) # Confirm that the `IncSubtensor` operations are correct assert jax_res[0, 0] == -10.0 assert jax_res[0, 1] == -8.0 out = tt.clip(x, y, 5) out_fg = theano.gof.FunctionGraph([x, y], [out]) (jax_res, ) = compare_jax_and_py(out_fg, test_input_vals)
def test_reallocation(): x = tensor.scalar('x') y = tensor.scalar('y') z = tensor.tanh(3 * x + y) + tensor.cosh(x + 5 * y) for l in ['vm_nogc', 'vm', 'vm_nogc', 'vm']: m = theano.compile.get_mode(theano.Mode(linker=l)) m = m.excluding('fusion', 'inplace') f = theano.function([x, y], z, name="test_reduce_memory", mode=m) output = f(1, 2) assert output storage_map = f.fn.storage_map def check_storage(storage_map): from theano.tensor.var import TensorConstant for i in storage_map.keys(): if not isinstance(i, TensorConstant): keys_copy = storage_map.keys()[:] keys_copy.remove(i) for o in keys_copy: if (storage_map[i][0] and storage_map[i][0] is storage_map[o][0]): return [True, storage_map[o][0]] return [False, None] assert check_storage(storage_map)[0] assert len(set([id(v) for v in storage_map.values()])) < len(storage_map)
def get_celerite_matrices(self, x, diag): dt = self.delta ar, cr, a, b, c, d = self.term.coefficients # Real part cd = cr * dt delta_diag = 2 * tt.sum(ar * (cd - tt.sinh(cd)) / cd**2) # Complex part cd = c * dt dd = d * dt c2 = c**2 d2 = d**2 c2pd2 = c2 + d2 C1 = a * (c2 - d2) + 2 * b * c * d C2 = b * (c2 - d2) - 2 * a * c * d norm = (dt * c2pd2)**2 sinh = tt.sinh(cd) cosh = tt.cosh(cd) delta_diag += 2 * tt.sum( (C2 * cosh * tt.sin(dd) - C1 * sinh * tt.cos(dd) + (a * c + b * d) * dt * c2pd2) / norm) new_diag = diag + delta_diag # new_diag = diag return super(IntegratedTerm, self).get_celerite_matrices(x, new_diag)
def test_reallocation(): x = tensor.scalar("x") y = tensor.scalar("y") z = tensor.tanh(3 * x + y) + tensor.cosh(x + 5 * y) # The functinality is currently implement for non lazy and non c VM only. for linker in [ vm.VM_Linker(allow_gc=False, lazy=False, use_cloop=False), vm.VM_Linker(allow_gc=True, lazy=False, use_cloop=False), ]: m = theano.compile.get_mode(theano.Mode(linker=linker)) m = m.excluding("fusion", "inplace") f = theano.function([x, y], z, name="test_reduce_memory", mode=m) output = f(1, 2) assert output storage_map = f.fn.storage_map def check_storage(storage_map): from theano.tensor.var import TensorConstant for i in storage_map: if not isinstance(i, TensorConstant): keys_copy = list(storage_map.keys())[:] keys_copy.remove(i) for o in keys_copy: if storage_map[i][ 0] and storage_map[i][0] is storage_map[o][0]: return [True, storage_map[o][0]] return [False, None] assert check_storage(storage_map)[0] assert len(set(id(v) for v in storage_map.values())) < len(storage_map)
def test_reallocation(): x = tensor.scalar('x') y = tensor.scalar('y') z = tensor.tanh(3 * x + y) + tensor.cosh(x + 5 * y) # The functinality is currently implement for non lazy and non c VM only. for l in [vm.VM_Linker(allow_gc=False, lazy=False, use_cloop=False), vm.VM_Linker(allow_gc=True, lazy=False, use_cloop=False)]: m = theano.compile.get_mode(theano.Mode(linker=l)) m = m.excluding('fusion', 'inplace') f = theano.function([x, y], z, name="test_reduce_memory", mode=m) output = f(1, 2) assert output storage_map = f.fn.storage_map def check_storage(storage_map): from theano.tensor.var import TensorConstant for i in storage_map: if not isinstance(i, TensorConstant): keys_copy = list(storage_map.keys())[:] keys_copy.remove(i) for o in keys_copy: if (storage_map[i][0] and storage_map[i][0] is storage_map[o][0]): return [True, storage_map[o][0]] return [False, None] assert check_storage(storage_map)[0] assert len(set(id(v) for v in itervalues(storage_map))) < len(storage_map)
def value(self, tau0): dt = self.delta ar, cr, a, b, c, d = self.term.coefficients # Format the lags correctly tau0 = tt.abs_(tau0) tau = tt.reshape(tau0, tt.concatenate([tau0.shape, [1]]), ndim=tau0.ndim + 1) # Precompute some factors dpt = dt + tau dmt = dt - tau # Real parts: # tau > Delta crd = cr * dt norm = 1.0 / (crd)**2 factor = (tt.exp(crd) + tt.exp(-crd) - 2) * norm, K_large = tt.sum(ar * tt.exp(-cr * tau) * factor, axis=-1) # tau < Delta K_small = tt.sum((2 * cr * (dmt) + tt.exp(-cr * dmt) + tt.exp(-cr * dpt) - 2 * tt.exp(-cr * tau)) * norm, axis=-1) # Complex part cd = c * dt dd = d * dt c2 = c**2 d2 = d**2 c2pd2 = c2 + d2 C1 = a * (c2 - d2) + 2 * b * c * d C2 = b * (c2 - d2) - 2 * a * c * d norm = 1.0 / (dt * c2pd2)**2 k0 = tt.exp(-c * tau) cdt = tt.cos(d * tau) sdt = tt.sin(d * tau) # For tau > Delta cos_term = 2 * (tt.cosh(cd) * tt.cos(dd) - 1) sin_term = 2 * (tt.sinh(cd) * tt.sin(dd)) factor = k0 * norm K_large += tt.sum((C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1) K_large += tt.sum((C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1) # Real part edmt = tt.exp(-c * dmt) edpt = tt.exp(-c * dpt) cos_term = edmt * tt.cos(d * dmt) + edpt * tt.cos( d * dpt) - 2 * k0 * cdt sin_term = edmt * tt.sin(d * dmt) + edpt * tt.sin( d * dpt) - 2 * k0 * sdt K_small += tt.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1) K_small += tt.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1) return tt.switch(tt.le(tau0, dt), K_small, K_large)
def forward(self, x): # x: Nxd pretanh = T.dot(x, self.wmked) + self.b # N x d coshsqr = T.sqr(T.cosh(pretanh)) # N x d y = x + self.u * T.tanh(pretanh) logjaco = T.sum(T.log(T.abs_(1. + self.u / coshsqr * self.wdiag)), axis=1) return y, logjaco # N x d, N
def changing_weight(self, v_sample): """ function to compute the transition probability flipping spins for v_sample, which is Ts's=conj(psi(s',M))/conj(psi(s,M)), as for transverse field Ising model the flipping term in the Hamiltonian is hf=h/2(sp_i+sm_i), therefore flip each site contribute the same energy h/2, but the Ts's is different, but one can sum up Ts's for all s' :param v_sample: one sample of the visible layer :param Hamiltonian: Hamiltonian of the physical system we concern, we mainly use Hamiltonian.h :pbc: periodic boundary condition, 1:periodic, 0: open """ # As self.W_real has the size of nvisible*nhidden, and here v_sample is a # vector of nvisible, so ones needs to transpose self.W_real to make it broadcastable exponent=-2*v_sample*self.vbias+\ T.sum( T.log(T.cosh(self.hbias-self.W_real*v_sample)),axis=1)-\ T.sum( T.log(T.cosh(self.hbias+self.W_real*v_sample)),axis=1) return T.sum(T.exp(exponent))
def forward(self, x): # x: B x N x d ys = list() logjacos = list() for id in range(self.batchsize): pretanh = T.dot(x[id], self.wmked[id]) + self.b[id] coshsqr = T.sqr(T.cosh(pretanh)) y = x[id] + self.u[id] * T.tanh(pretanh) # N x d logjaco = T.sum(T.log( T.abs_(1. + self.u[id] / coshsqr * self.wdiag[id])), axis=1) ys.append(y) logjacos.append(logjaco) outy = T.concatenate(ys).reshape( (self.batchsize, self.splsize, self.dim)) # B x N x d outlogjaco = T.concatenate(logjacos).reshape( (self.batchsize, self.splsize)) # B x N return outy, outlogjaco
def axon(V, I, num_neurons, del_t, w_init_range=5.0): #Define W terms: W = theano.shared((np.random.randn(num_neurons)-0.5)*w_init_range, 'W') #Neuron equations defined according to Morris-Lecar model DE # and approximated using Euler's method m_inf = 1/2.0*( 1+T.tanh( (V-a1) / a2 ) ) w_inf = 1/2.0*( 1+T.tanh( (V-a3) / a4 ) ) t_inf = 1/T.cosh( (V-a3) / (2.0*a4) ) Wnew = phi*( (w_inf-W) / t_inf ) Vnew = (-gC * m_inf * (V-eC) - gK * W * (V-eK) - gL * (V-eL) + I)/C updates = [(W, W + del_t*Wnew)] Vout = V + del_t*Vnew return theano.function(inputs=[V, I], outputs=[Vout, W], updates=updates, allow_input_downcast=True)
def smooth_huber_loss(y, pred, w): """Regression loss function, smooth version of Huber loss function. """ return T.mean(w * T.log(T.cosh(y - pred)))
def coshApx(x, offset=1.5): out = T.switch(compAbs(x, offset), .5 * T.exp(T.abs_(x)), T.cosh(x)) return out
def logDetJacobian(self): grads = 1./( T.sqr(T.cosh(self.x))+ZERO ) return T.mean( T.sum( T.log( T.abs_(grads)+ZERO) ,axis=1 ) )
def logDetJacobian(self): dtrans = 1./(T.cosh(self.active)**2) # dtrans = T.grad(T.sum(self.active),wrt=self.x) return T.mean(T.log(T.abs_(1.+T.dot(self.u,self.w)*dtrans )))
def logcosh(inpt): return T.log(T.cosh(inpt))
def coshsqrApx(x, offset=1.5): out = T.switch(compAbs(x, offset), .25 * T.exp(2 * T.abs_(x)), T.sqr(T.cosh(x))) return out
def test_jax_basic(): x = tt.matrix("x") y = tt.matrix("y") b = tt.vector("b") # `ScalarOp` z = tt.cosh(x**2 + y / 3.0) # `[Inc]Subtensor` out = tt.set_subtensor(z[0], -10.0) out = tt.inc_subtensor(out[0, 1], 2.0) out = out[:5, :3] out_fg = theano.gof.FunctionGraph([x, y], [out]) test_input_vals = [ np.tile(np.arange(10), (10, 1)).astype(theano.config.floatX), np.tile(np.arange(10, 20), (10, 1)).astype(theano.config.floatX), ] (jax_res, ) = compare_jax_and_py(out_fg, test_input_vals) # Confirm that the `Subtensor` slice operations are correct assert jax_res.shape == (5, 3) # Confirm that the `IncSubtensor` operations are correct assert jax_res[0, 0] == -10.0 assert jax_res[0, 1] == -8.0 out = tt.clip(x, y, 5) out_fg = theano.gof.FunctionGraph([x, y], [out]) compare_jax_and_py(out_fg, test_input_vals) out = tt.diagonal(x, 0) out_fg = theano.gof.FunctionGraph([x], [out]) compare_jax_and_py( out_fg, [np.arange(10 * 10).reshape((10, 10)).astype(theano.config.floatX)]) out = tt.slinalg.cholesky(x) out_fg = theano.gof.FunctionGraph([x], [out]) compare_jax_and_py( out_fg, [(np.eye(10) + np.random.randn(10, 10) * 0.01).astype( theano.config.floatX)], ) # not sure why this isn't working yet with lower=False out = tt.slinalg.Cholesky(lower=False)(x) out_fg = theano.gof.FunctionGraph([x], [out]) compare_jax_and_py( out_fg, [(np.eye(10) + np.random.randn(10, 10) * 0.01).astype( theano.config.floatX)], ) out = tt.slinalg.solve(x, b) out_fg = theano.gof.FunctionGraph([x, b], [out]) compare_jax_and_py( out_fg, [ np.eye(10).astype(theano.config.floatX), np.arange(10).astype(theano.config.floatX), ], ) out = tt.nlinalg.alloc_diag(b) out_fg = theano.gof.FunctionGraph([b], [out]) compare_jax_and_py(out_fg, [np.arange(10).astype(theano.config.floatX)]) out = tt.nlinalg.det(x) out_fg = theano.gof.FunctionGraph([x], [out]) compare_jax_and_py( out_fg, [np.arange(10 * 10).reshape((10, 10)).astype(theano.config.floatX)]) out = tt.nlinalg.matrix_inverse(x) out_fg = theano.gof.FunctionGraph([x], [out]) compare_jax_and_py( out_fg, [(np.eye(10) + np.random.randn(10, 10) * 0.01).astype( theano.config.floatX)], )
def logdet_dinv(self, y): return tt.sum(tt.log(tt.cosh(self.shift + self.scale * tt.arcsinh(y))) ) + y.shape[0].astype(th.config.floatX) * tt.log( self.scale) - 0.5 * tt.sum(tt.log1p(y**2))
def __init__(self, W0, X, penalty=0.01): assert W0.shape[1] == X.shape[1], 'Dimensions do not match. W: (k,n), X: (m,n)' self.penalty = penalty self.W_init = W0.copy() print('Compiling ...') # build objective and gradientV W = T.dmatrix('W') x = T.dvector('x') u = W.dot(x) obj = 0.5 * T.sum((W.T.dot(u) - x).ravel() ** 2) + self.penalty * T.sum(T.log(T.cosh(u))) self.f = theano.function([W, x], obj) self.df = theano.function([W, x], T.grad(obj, W)) # initialize optimizer self.X = X self.data = [x for x in X] self.optimizer = SFO(self.f_df, W0, self.data) print('Done.')
def softabs(x): """""" return T.log(np.float32(2)*T.cosh(x)) - T.log(np.float32(2))
def coshsqrinvApx(x, offset=1.5): out = T.switch(compAbs(x, offset), 4. * T.exp(-2. * T.abs_(x)), T.sqr(T.cosh(x)**-1)) return out
def __init__(self, in_dim, h_dim, out_dim, alpha, momentum): self.alpha = alpha self.momentum = momentum self.dims = [in_dim, h_dim, out_dim] _in = np.zeros((1, in_dim), dtype=np.float32) _h = np.zeros((1, h_dim), dtype=np.float32) _out = np.zeros((1, out_dim), dtype=np.float32) self._in = theano.shared(name='_in', value=_in.astype(theano.config.floatX), borrow=True) self._h = theano.shared(name='_h', value=_h.astype(theano.config.floatX), borrow=True) self._out = theano.shared(name='_out', value=_out.astype(theano.config.floatX), borrow=True) # in_h_Ws = uniform_f(in_dim, h_dim) # h_out_Ws = uniform_f(h_dim, out_dim) in_h_Ws = np.random.normal(0.5, np.sqrt(0.25), (in_dim, h_dim)).astype(np.float32) h_out_Ws = np.random.normal(0.5, np.sqrt(0.25), (h_dim, out_dim)).astype(np.float32) self.in_h_Ws = theano.shared(name='in_h_Ws', value=in_h_Ws.astype(theano.config.floatX), borrow=True) self.h_out_Ws = theano.shared(name='h_out_Ws', value=h_out_Ws.astype(theano.config.floatX), borrow=True) prev_dW1 = np.zeros_like(in_h_Ws, dtype=np.float32) prev_dW2 = np.zeros_like(h_out_Ws, dtype=np.float32) self.prev_delta_W_in_h = theano.shared(name='prev_delta_W_in_h', value=prev_dW1.astype(theano.config.floatX), borrow=True) self.prev_delta_W_h_out = theano.shared(name='prev_delta_W_h_out', value=prev_dW2.astype(theano.config.floatX), borrow=True) new_input = T.fmatrix() input_hidden_Ws = T.fmatrix() hidden_output_Ws = T.fmatrix() sum_h = new_input.dot(input_hidden_Ws) next_h = T.tanh(sum_h) sum_out = next_h.dot(hidden_output_Ws) next_out = T.tanh(sum_out) self.feed_forward = theano.function([new_input, input_hidden_Ws, hidden_output_Ws], updates=[(self._in, new_input), (self._h, next_h), (self._out, next_out)]) self.set_output = theano.function([new_input], updates=[(self._out, new_input)]) Ws_h_out = T.fmatrix() Ws_in_h = T.fmatrix() prev_delta_W_in_h = T.fmatrix() prev_delta_W_h_out = T.fmatrix() o_in = T.fmatrix() o_h = T.fmatrix() o_out = T.fmatrix() target_out = T.fmatrix() # L2 norm tmp = o_out-target_out error = tmp tmp_grad_h_out = np.ones_like(o_out, dtype=np.float32) / T.cosh(o_out) diracs_out = error * tmp_grad_h_out * tmp_grad_h_out delta_W_h_out = - self.alpha * o_h.T.dot(diracs_out) + self.momentum * prev_delta_W_h_out new_Ws_h_out = Ws_h_out + delta_W_h_out tmp_grad_in_h = np.ones_like(o_h, dtype=np.float32) / T.cosh(o_h) diracs_h_layer_terms = tmp_grad_in_h * tmp_grad_in_h diracs_h_chain = diracs_out.dot(Ws_h_out.T) diracs_h = diracs_h_chain * diracs_h_layer_terms delta_W_in_h = - self.alpha * o_in.T.dot(diracs_h) + self.momentum * prev_delta_W_in_h new_Ws_in_h = Ws_in_h + delta_W_in_h self.back_propagate = theano.function([Ws_in_h, Ws_h_out, o_in, o_h, o_out, target_out, prev_delta_W_in_h, prev_delta_W_h_out], updates=[(self.h_out_Ws, new_Ws_h_out), (self.in_h_Ws, new_Ws_in_h), (self.prev_delta_W_in_h, delta_W_in_h), (self.prev_delta_W_h_out, delta_W_h_out)]) # self.set_input = theano.function([new_input], updates=[(self._in, new_input)]) new_weights = T.fmatrix('new_weights') self.update_in_h_weights = theano.function([new_weights], updates=[(self.in_h_Ws, new_weights)]) self.update_h_out_weights = theano.function([new_weights], updates=[(self.h_out_Ws, new_weights)])