def create_fns(input, in_signs, Ds): cumulative_units = np.concatenate([[0], np.cumsum(Ds[:-1])]) Ws = [sj.initializers.he((j, i)) for j, i in zip(Ds[1:], Ds[:-1])] bs = [sj.initializers.he((j,)) for j in Ds[1:]] A_w = [T.eye(Ds[0])] B_w = [T.zeros(Ds[0])] A_q = [T.eye(Ds[0])] B_q = [T.zeros(Ds[0])] maps = [input] signs = [] masks = [T.ones(Ds[0])] in_masks = T.where(T.concatenate([T.ones(Ds[0]), in_signs]) > 0, 1., 0.1) for w, b in zip(Ws[:-1], bs[:-1]): pre_activation = T.matmul(w, maps[-1]) + b signs.append(T.sign(pre_activation)) masks.append(T.where(pre_activation > 0, 1., 0.1)) maps.append(pre_activation * masks[-1]) maps.append(T.matmul(Ws[-1], maps[-1]) + bs[-1]) # compute per region A and B for start, end, w, b, m in zip(cumulative_units[:-1], cumulative_units[1:], Ws, bs, masks): A_w.append(T.matmul(w * m, A_w[-1])) B_w.append(T.matmul(w * m, B_w[-1]) + b) A_q.append(T.matmul(w * in_masks[start:end], A_q[-1])) B_q.append(T.matmul(w * in_masks[start:end], B_q[-1]) + b) signs = T.concatenate(signs) ineq_b = T.concatenate(B_w[1:-1]) ineq_A = T.vstack(A_w[1:-1]) inequalities = T.hstack([ineq_b[:, None], ineq_A]) inequalities = inequalities * signs[:, None] / T.linalg.norm(ineq_A, 2, 1, keepdims=True) inequalities_code = T.hstack([T.concatenate(B_q[1:-1])[:, None], T.vstack(A_q[1:-1])]) inequalities_code = inequalities_code * in_signs[:, None] f = sj.function(input, outputs=[maps[-1], A_w[-1], B_w[-1], inequalities, signs]) g = sj.function(in_signs, outputs=[A_q[-1], B_q[-1]]) all_g = sj.function(in_signs, outputs=inequalities_code) h = sj.function(input, outputs=maps[-1]) return f, g, h, all_g
def create_vae(batch_size, Ds, seed, leakiness=0.1, lr=0.0002, scaler=1): x = T.Placeholder([batch_size, Ds[-1]], 'float32') # ENCODER enc = encoder(x, Ds[0]) mu = enc[-1][:, :Ds[0]] logvar = enc[-1][:, Ds[0]:] var = T.exp(logvar) z = mu + T.exp(0.5 * logvar) * T.random.randn((batch_size, Ds[0])) z_ph = T.Placeholder((batch_size, Ds[0]), 'float32') # DECODER Ws, bs = init_weights(Ds, seed, scaler) Ws = [T.Variable(w) for w in Ws] bs = [T.Variable(b) for b in bs] logvar_x = T.Variable(T.zeros(1), name='logvar_x') var_x = T.exp(logvar_x) h, h_ph = [z], [z_ph] for w, b in zip(Ws[:-1], bs[:-1]): h.append(T.matmul(h[-1], w.transpose()) + b) h.append(h[-1] * relu_mask(h[-1], leakiness)) h_ph.append(T.matmul(h_ph[-1], w.transpose()) + b) h_ph.append(h_ph[-1] * relu_mask(h_ph[-1], leakiness)) h.append(T.matmul(h[-1], Ws[-1].transpose()) + bs[-1]) h_ph.append(T.matmul(h_ph[-1], Ws[-1].transpose()) + bs[-1]) prior = sum([T.mean(w**2) for w in Ws], 0.) / cov_W\ + sum([T.mean(v**2) for v in bs[:-1]], 0.) / cov_b kl = 0.5 * (1 + logvar - var - mu ** 2).sum(1) px = - 0.5 * (logvar_x + ((x - h[-1])**2 / var_x)).sum(1) loss = - (px + kl).mean() + prior variables = Ws + bs + sj.layers.get_variables(enc) + [logvar_x] opti = sj.optimizers.Adam(loss, lr, params=variables) train = sj.function(x, outputs=loss, updates=opti.updates) g = sj.function(z_ph, outputs=h_ph[-1]) params = sj.function(outputs = Ws + bs + [T.exp(logvar_x) * T.ones(Ds[-1])]) get_varx = sj.function(outputs = var_x) output = {'train': train, 'g':g, 'params':params} output['model'] = 'VAE' output['varx'] = get_varx output['kwargs'] = {'batch_size': batch_size, 'Ds':Ds, 'seed':seed, 'leakiness':leakiness, 'lr':lr, 'scaler':scaler, 'prior': sj.function(outputs=prior)} def sample(n): samples = [] for i in range(n // batch_size): samples.append(g(np.random.randn(batch_size, Ds[0]))) return np.concatenate(samples) output['sample'] = sample return output
def PiecewiseConstant(init, steps_and_values): with Scope("PiecewiseConstant"): all_steps = T.stack([0] + list(steps_and_values.keys())) all_values = T.stack([init] + list(steps_and_values.values())) step = T.Variable( T.zeros(1), trainable=False, name="step", dtype="float32", ) value = all_values[(step < all_steps).argmin() - 1] current_graph().add({step: step + 1}) return value
def ExponentialMovingAverage(value, alpha): with Scope("ExponentialMovingAverage"): first_step = T.Variable(True, trainable=False, name="first_step", dtype="bool") var = T.Variable(T.zeros(value.shape), trainable=False, dtype="float32", name="EMA") new_value = T.where(first_step, value, var * alpha + (1 - alpha) * value) current_graph().add({var: new_value, first_step: False}) return new_value, var
def create_fns(input, in_signs, Ds, x, m0, m1, m2, batch_in_signs, alpha=0.1, sigma=1, sigma_x=1, lr=0.0002): cumulative_units = np.concatenate([[0], np.cumsum(Ds[:-1])]) BS = batch_in_signs.shape[0] Ws = [ T.Variable(sj.initializers.glorot((j, i)) * sigma) for j, i in zip(Ds[1:], Ds[:-1]) ] bs = [T.Variable(sj.initializers.he((j,)) * sigma) for j in Ds[1:-1]]\ + [T.Variable(T.zeros((Ds[-1],)))] A_w = [T.eye(Ds[0])] B_w = [T.zeros(Ds[0])] A_q = [T.eye(Ds[0])] B_q = [T.zeros(Ds[0])] batch_A_q = [T.eye(Ds[0]) * T.ones((BS, 1, 1))] batch_B_q = [T.zeros((BS, Ds[0]))] maps = [input] signs = [] masks = [T.ones(Ds[0])] in_masks = T.where(T.concatenate([T.ones(Ds[0]), in_signs]) > 0, 1., alpha) batch_in_masks = T.where( T.concatenate([T.ones((BS, Ds[0])), batch_in_signs], 1) > 0, 1., alpha) for w, b in zip(Ws[:-1], bs[:-1]): pre_activation = T.matmul(w, maps[-1]) + b signs.append(T.sign(pre_activation)) masks.append(T.where(pre_activation > 0, 1., alpha)) maps.append(pre_activation * masks[-1]) maps.append(T.matmul(Ws[-1], maps[-1]) + bs[-1]) # compute per region A and B for start, end, w, b, m in zip(cumulative_units[:-1], cumulative_units[1:], Ws, bs, masks): A_w.append(T.matmul(w * m, A_w[-1])) B_w.append(T.matmul(w * m, B_w[-1]) + b) A_q.append(T.matmul(w * in_masks[start:end], A_q[-1])) B_q.append(T.matmul(w * in_masks[start:end], B_q[-1]) + b) batch_A_q.append( T.matmul(w * batch_in_masks[:, None, start:end], batch_A_q[-1])) batch_B_q.append((w * batch_in_masks[:, None, start:end]\ * batch_B_q[-1][:, None, :]).sum(2) + b) batch_B_q = batch_B_q[-1] batch_A_q = batch_A_q[-1] signs = T.concatenate(signs) inequalities = T.hstack( [T.concatenate(B_w[1:-1])[:, None], T.vstack(A_w[1:-1])]) * signs[:, None] inequalities_code = T.hstack( [T.concatenate(B_q[1:-1])[:, None], T.vstack(A_q[1:-1])]) * in_signs[:, None] #### loss log_sigma2 = T.Variable(sigma_x) sigma2 = T.exp(log_sigma2) Am1 = T.einsum('qds,nqs->nqd', batch_A_q, m1) Bm0 = T.einsum('qd,nq->nd', batch_B_q, m0) B2m0 = T.einsum('nq,qd->n', m0, batch_B_q**2) AAm2 = T.einsum('qds,qdu,nqup->nsp', batch_A_q, batch_A_q, m2) inner = -(x * (Am1.sum(1) + Bm0)).sum(1) + (Am1 * batch_B_q).sum((1, 2)) loss_2 = (x**2).sum(1) + B2m0 + T.trace(AAm2, axis1=1, axis2=2).squeeze() loss_z = T.trace(m2.sum(1), axis1=1, axis2=2).squeeze() cst = 0.5 * (Ds[0] + Ds[-1]) * T.log(2 * np.pi) loss = cst + 0.5 * Ds[-1] * log_sigma2 + inner / sigma2\ + 0.5 * loss_2 / sigma2 + 0.5 * loss_z mean_loss = loss.mean() adam = sj.optimizers.NesterovMomentum(mean_loss, Ws + bs, lr, 0.9) train_f = sj.function(batch_in_signs, x, m0, m1, m2, outputs=mean_loss, updates=adam.updates) f = sj.function(input, outputs=[maps[-1], A_w[-1], B_w[-1], inequalities, signs]) g = sj.function(in_signs, outputs=[A_q[-1], B_q[-1]]) all_g = sj.function(in_signs, outputs=inequalities_code) h = sj.function(input, outputs=maps[-1]) return f, g, h, all_g, train_f, sigma2
def __init__( self, sequence, init_h, units, Wf=initializers.glorot_uniform, Uf=initializers.orthogonal, bf=T.zeros, Wi=initializers.glorot_uniform, Ui=initializers.orthogonal, bi=T.zeros, Wo=initializers.glorot_uniform, Uo=initializers.orthogonal, bo=T.zeros, Wc=initializers.glorot_uniform, Uc=initializers.orthogonal, bc=T.zeros, trainable_Wf=True, trainable_Uf=True, trainable_bf=True, trainable_Wi=True, trainable_Ui=True, trainable_bi=True, trainable_Wo=True, trainable_Uo=True, trainable_bo=True, trainable_Wc=True, trainable_Uc=True, trainable_bc=True, activation_g=nn.sigmoid, activation_c=T.tanh, activation_h=T.tanh, only_last=False, gate="minimal", ): self.create_variable("Wf", Wf, (sequence.shape[2], units), trainable=trainable_Wf) self.create_variable("Uf", Uf, (units, units), trainable=trainable_Uf) self.create_variable("bf", bf, (units, ), trainable=trainable_bf) self.create_variable("Wi", Wi, (sequence.shape[2], units), trainable=trainable_Wi) self.create_variable("Ui", Ui, (units, units), trainable=trainable_Ui) self.create_variable("bi", bi, (units, ), trainable=trainable_bi) self.create_variable("Wo", Wo, (sequence.shape[2], units), trainable=trainable_Wo) self.create_variable("Uo", Uo, (units, units), trainable=trainable_Uo) self.create_variable("bo", bo, (units, ), trainable=trainable_bo) self.create_variable("Wc", Wc, (sequence.shape[2], units), trainable=trainable_Wc) self.create_variable("Uc", Uc, (units, units), trainable=trainable_Uc) self.create_variable("bc", bc, (units, ), trainable=trainable_bc) def fn(*args): return self.gate(*args, activation_g, activation_c, activation_h) init = T.stack((init_h, T.zeros(init_h.shape, init_h.dtype))) last, output = T.scan( fn, init=init, sequences=[sequence.transpose((1, 0, 2))], non_sequences=[ self.Wf, self.Uf, self.bf, self.Wi, self.Ui, self.bi, self.Wo, self.Uo, self.bo, self.Wc, self.Uc, self.bc, ], ) if only_last: return last else: return output.transpose((1, 0, 2))
sys.path.insert(0, "../") import symjax as sj import symjax.tensor as T import matplotlib.pyplot as plt import matplotlib matplotlib.use('Agg') ###### 2D GAUSSIAN EXAMPLE t = T.linspace(-5, 5, 5) x, y = T.meshgrid(t, t) X = T.stack([x.flatten(), y.flatten()], 1) p = T.pdfs.multivariate_normal.pdf(X, T.zeros(2), T.eye(2)) p = p.reshape((5, 5)).round(2) print(p) # Tensor(Op=round_, shape=(5, 5), dtype=float32) # lazy evaluation (not compiled nor optimized) print(p.get()) # [[0. 0. 0. 0. 0. ] # [0. 0. 0.01 0. 0. ] # [0. 0.01 0.16 0.01 0. ] # [0. 0. 0.01 0. 0. ] # [0. 0. 0. 0. 0. ]] # create the function which internall compiles and optimizes # the function does not take any arguments and only outputs the
# map xx = T.ones(10) a = T.map(lambda a: a * 2, xx) g = symjax.gradients(a.sum(), xx)[0] f = symjax.function(outputs=[a, g]) # scan xx = T.ones(10) * 2 a = T.scan(lambda c, x: (c * x, c * x), T.ones(1), xx) g = symjax.gradients(a[1][-1], xx)[0] f = symjax.function(outputs=[a, g]) # scan with updates xx = T.range(5) uu = T.ones((10, 2)) vvar = T.Variable(T.zeros((10, 2))) vv = T.index_add(vvar, 1, 1) a = T.scan(lambda c, x, p: (T.index_update(c, x, p[x]), 1), vv, xx, [vv]) #a = T.scan(lambda c, x: (c*x,c*x), T.ones(1), xx) #a = T.scan(lambda c, x: (T.square(c),c[0]), uu, xx) #g = symjax.gradients(a[1][-1],xx) f = symjax.function(outputs=a[0], updates={vvar: vvar + 1}) print(f(), f(), f()) asdf # fori loop b = T.Placeholder((), 'int32') xx = T.ones(1) a = T.fori_loop(0, b, lambda i, x: i * x, xx) f = symjax.function(b, outputs=a) print(f(0), f(1), f(2), f(3))
import symjax import symjax.tensor as T g = symjax.Graph("model1") with g: learning_rate = T.Variable(T.ones((1, ))) with symjax.Graph("layer1"): W1 = T.Variable(T.zeros((1, )), name="W") b1 = T.Variable(T.zeros((1, )), name="b") with symjax.Graph("layer2"): W2 = T.Variable(T.zeros((1, )), name="W") b2 = T.Variable(T.zeros((1, )), name="b") # define an irrelevant loss function involving the parameters loss = (W1 + b1 + W2 + b2) * learning_rate # and a train/update function train = symjax.function(outputs=loss, updates={ W1: W1 + 1, b1: b1 + 2, W2: W2 + 2, b2: b2 + 3 }) # pretend we train for a while for i in range(4): print(train()) # [0.] # [8.]
import sys sys.path.insert(0, "../") import symjax as sj import symjax.tensor as T import numpy as np __author__ = "Randall Balestriero" # example of cumulative sum def func(carry, x): return carry + 1, 0 output, _ = T.scan(func, T.zeros(1), T.ones(10), length=10) f = sj.function(outputs=output) print(f()) # [10.] # example of simple RNN w = T.Placeholder((3, 10), 'float32') h = T.random.randn((3, 3)) b = T.random.randn((3, )) t_steps = 100 X = T.random.randn((t_steps, 10)) def rnn_cell(carry, x, w):
__author__ = "Randall Balestriero" class product: def __init__(self, W, V=1): self.W = jnp.square(V * W * (W > 0).astype("float32")) self.ndim = self.compute_ndim() def feed(self, x): return jnp.dot(self.W, x) def compute_ndim(self): return self.W.shape[0] * self.W.shape[1] wrapped = T.wrap_class(product, method_exceptions=["compute_ndim"]) a = wrapped(T.zeros((10, 10)), V=T.ones((10, 10))) x = T.random.randn((10, 100)) print(a.W) # (Tensor: name=function[0], shape=(10, 10), dtype=float32) print(a.feed(x)) # Op(name=feed, shape=(10, 100), dtype=float32, scope=/) f = sj.function(outputs=a.feed(x)) f()
import symjax import symjax.tensor as T # scope/graph naming and accessing value1 = T.Variable(T.ones((1,))) value2 = T.Variable(T.zeros((1,))) g = symjax.Graph("special") with g: value3 = T.Variable(T.zeros((1,))) value4 = T.Variable(T.zeros((1,))) result = value3 + value4 h = symjax.Graph("inversion") with h: value5 = T.Variable(T.zeros((1,))) value6 = T.Variable(T.zeros((1,))) value7 = T.Variable(T.zeros((1,)), name="w") print(g.variables) # {'unnamed_variable': Variable(name=unnamed_variable, shape=(1,), dtype=float32, trainable=True, scope=/special/), # 'unnamed_variable_1': Variable(name=unnamed_variable_1, shape=(1,), dtype=float32, trainable=True, scope=/special/)} print(h.variables) # {'unnamed_variable': Variable(name=unnamed_variable, shape=(1,), dtype=float32, trainable=True, scope=/special/inversion/), # 'unnamed_variable_1': Variable(name=unnamed_variable_1, shape=(1,), dtype=float32, trainable=True, scope=/special/inversion/), # 'w': Variable(name=w, shape=(1,), dtype=float32, trainable=True, scope=/special/inversion/)} print(h.variable("w"))