def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim)) a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n*adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def __init__(self, xdim, args, dec="bernoulli"): self.xdim = xdim self.hdim = args.hdim self.zdim = args.zdim self.lmbda = args.lmbda # weight decay coefficient * 2 self.x = cgt.matrix("x", dtype=cgt.floatX) self.eps = cgt.matrix("eps", dtype=cgt.floatX) self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps) if dec == "bernoulli": # log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y) self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x) elif dec == "gaussian": self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x) else: raise RuntimeError("unrecognized decoder %" % dec) self.cost = (-cgt.sum(kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) + self.dec_mlp.cost) / args.batch_size self.params = self.enc_mlp.params + self.dec_mlp.params # L2 regularization self.gparams = [cgt.grad(self.cost, [p])[0] + self.lmbda * p for p in self.params] self.gaccums = [cgt.shared(np.zeros(p.op.get_value().shape, dtype=cgt.floatX)) for p in self.params] # XXX replace w/ adagrad update from nn ADAGRAD_EPS = 1e-10 # for stability self.updates = [ (param, param - args.lr * gparam / cgt.sqrt(gaccum + cgt.square(gparam) + ADAGRAD_EPS)) for param, gparam, gaccum in zip(self.params, self.gparams, self.gaccums) ] self.updates += [ (gaccum, gaccum + cgt.square(gparam)) for gaccum, gparam in zip(self.gaccums, self.gparams) ] self.train = cgt.function( [self.x, self.eps], self.cost, updates=self.updates ) self.test = cgt.function( [self.x, self.eps], self.cost, updates=None ) # can be used for semi-supervised learning for example self.encode = cgt.function( [self.x, self.eps], self.enc_mlp.out )
def test_flatvec(): cgt.reset_config cgt.set_precision('double') cgt.core.update_config(backend="python") # XXX N = 10 K = 3 Xval = np.random.randn(N, K) wval = np.random.randn(K) bval = np.random.randn() yval = np.random.randn(N) X_nk = cgt.shared(Xval, "X") y_n = cgt.shared(yval, "y") w_k = cgt.shared(wval, "w") b = cgt.shared(bval, name="b") ypred = cgt.dot(X_nk, w_k) + b err = cgt.sum(cgt.square(ypred - y_n)) g = cgt.grad(err, [w_k, b]) g = core.simplify(g) pars = [w_k, b] flatx = nn.setup_contiguous_storage(pars) f = cgt.function([], [err, cgt.flatcat(g)])
def test_flatvec(): cgt.reset_config cgt.set_precision('double') cgt.core.update_config(backend="python") # XXX N = 10 K = 3 Xval = np.random.randn(N,K) wval = np.random.randn(K) bval = np.random.randn() yval = np.random.randn(N) X_nk = cgt.shared(Xval, "X") y_n = cgt.shared(yval, "y") w_k = cgt.shared(wval, "w") b = cgt.shared(bval, name="b") ypred = cgt.dot(X_nk, w_k) + b err = cgt.sum(cgt.square(ypred - y_n)) g = cgt.grad(err, [w_k, b]) g = core.simplify(g) pars = [w_k, b] flatx = nn.setup_contiguous_storage(pars) f = cgt.function([], [err,cgt.flatcat(g)])
def test_devices(): N = 10 K = 3 compile_info = cgt.compilation.get_compile_info() cuda_enabled = compile_info["CGT_ENABLE_CUDA"] if not cuda_enabled: raise SkipTest("cuda disabled") Xval = np.random.randn(N,K).astype(cgt.floatX) wval = np.random.randn(K).astype(cgt.floatX) bval = np.asarray(np.random.randn()).astype(cgt.floatX) yval = np.random.randn(N).astype(cgt.floatX) with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")): X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu')) y_n = cgt.shared(yval, "y") w_k = cgt.shared(wval, "w") b = cgt.shared(bval, name="b") print "bval",bval ypred = cgt.dot(cgt.square(X_nk), w_k) + b err = cgt.sum(cgt.sin(ypred - y_n)) g = cgt.grad(err, [w_k, b]) outputs = [err]+g f = cgt.function([], [err]+g) results = f() print results assert np.allclose(results[0] , np.sin(np.square(Xval).dot(wval)+bval-yval).sum())
def test_devices(): N = 10 K = 3 compile_info = cgt.compilation.get_compile_info() cuda_enabled = compile_info["CGT_ENABLE_CUDA"] if not cuda_enabled: raise SkipTest("cuda disabled") Xval = np.random.randn(N, K).astype(cgt.floatX) wval = np.random.randn(K).astype(cgt.floatX) bval = np.asarray(np.random.randn()).astype(cgt.floatX) yval = np.random.randn(N).astype(cgt.floatX) with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")): X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu')) y_n = cgt.shared(yval, "y") w_k = cgt.shared(wval, "w") b = cgt.shared(bval, name="b") print "bval", bval ypred = cgt.dot(cgt.square(X_nk), w_k) + b err = cgt.sum(cgt.sin(ypred - y_n)) g = cgt.grad(err, [w_k, b]) outputs = [err] + g f = cgt.function([], [err] + g) results = f() print results assert np.allclose( results[0], np.sin(np.square(Xval).dot(wval) + bval - yval).sum())
def runTest(self): cgt.set_precision('double') x = cgt.vector() y = cgt.square(x) eg = cgt.execution.compilation_pipeline([x],[y+y],[]) pprint.pprint(eg.to_json()) import cycgt interp = cycgt.cInterpreter(eg) print interp(np.array([3,4,5,6],'f8'))
def rmsprop_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6): grads = cgt.grad(cost, params) updates = [] for p, g in zip(params, grads): acc = cgt.shared(p.op.get_value() * 0.) acc_new = rho * acc + (1 - rho) * cgt.square(g) gradient_scaling = cgt.sqrt(acc_new + epsilon) g = g / gradient_scaling updates.append((acc, acc_new)) updates.append((p, p - stepsize * g)) return updates
def test_linreg(): cgt.reset_config() cgt.set_precision('double') N = 10 K = 3 Xval = np.random.randn(N, K) wval = np.random.randn(K) bval = np.random.randn() yval = np.random.randn(N) X_nk = cgt.matrix("X") y_n = cgt.vector("y") w_k = cgt.vector("w") b = cgt.scalar(name="b") ypred = cgt.dot(X_nk, w_k) + b err = cgt.sum(cgt.square(ypred - y_n)) g = cgt.grad(err, [w_k, b]) g_simple, an, _ = cgt.core.simplify_and_analyze(g) print "Loss function:" cgt.print_tree([err]) print "Gradient:" cgt.print_tree(g) print "Gradient simplified" cgt.print_tree( g_simple, nodefn=lambda node, o: o.write(" " + an["node2hash"][node][:5])) print "-------" d = {X_nk: Xval, w_k: wval, b: bval, y_n: yval} np.testing.assert_allclose(cgt.numeric_eval(err, d), np.linalg.norm(Xval.dot(wval) + bval - yval)**2) np.testing.assert_allclose(cgt.numeric_eval(g[0], d), 2 * Xval.T.dot(Xval.dot(wval) + bval - yval)) np.testing.assert_allclose(cgt.numeric_eval(g[1], d), 2 * np.sum(Xval.dot(wval) + bval - yval, 0))
def test_linreg(): N = 10 K = 3 Xval = np.random.randn(N,K) wval = np.random.randn(K) bval = np.random.randn() yval = np.random.randn(N) X_nk = cgt.matrix("X") y_n = cgt.vector("y") w_k = cgt.vector("w") b = cgt.scalar(name="b") ypred = cgt.dot(X_nk, w_k) + b err = cgt.sum(cgt.square(ypred - y_n)) g = cgt.grad(err, [w_k, b]) g_simple,an,_ = cgt.core.simplify_and_analyze(g) print "Loss function:" cgt.print_tree([err]) print "Gradient:" cgt.print_tree(g) print "Gradient simplified" cgt.print_tree(g_simple, nodefn=lambda node,o: o.write(" " + an["node2hash"][node][:5])) print "-------" d = {X_nk : Xval, w_k : wval, b : bval, y_n : yval} np.testing.assert_allclose(cgt.numeric_eval(err,d), np.linalg.norm(Xval.dot(wval) + bval - yval)**2, atol={"single":1e-3,"double":1e-6}[cgt.get_precision()]) np.testing.assert_allclose(cgt.numeric_eval(g[0],d), 2 * Xval.T.dot(Xval.dot(wval) + bval - yval), atol={"single":1e-3,"double":1e-6}[cgt.get_precision()]) np.testing.assert_allclose(cgt.numeric_eval(g[1],d), 2 * np.sum(Xval.dot(wval) + bval - yval, 0), atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
# Be careful when setting alpha! If it's too large # here the cost will blow up. alpha = 1e-7 epochs = 100 # Linear regression model np.random.seed(0) X = cgt.matrix("X", fixed_shape=(None, nfeats)) Y = cgt.vector("Y") w = cgt.shared(np.random.randn(nfeats) * 0.01) # prediction ypred = cgt.dot(X, w) # cost cost = cgt.square(Y - ypred).mean() # derivative with respect to w dw = cgt.grad(cost=cost, wrt=w) updates = [(w, w - dw * alpha)] # training function trainf = cgt.function(inputs=[X, Y], outputs=[], updates=updates) # cost function, no updates costf = cgt.function(inputs=[X, Y], outputs=cost) for i in xrange(epochs): trainf(X_train, Y_train) C = costf(X_test, Y_test) print("epoch {} cost = {}".format(i + 1, C))
def square(x): return cgt.square(x)
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim)) a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh( nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh( nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid, ctrl_dim, weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na).sum(axis=1) ) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n * adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def kld_unit_mvn(mu, var): # KL divergence from N(0, I) return (mu.shape[1] + cgt.sum(cgt.log(var), axis=1) - cgt.sum(cgt.square(mu), axis=1) - cgt.sum(var, axis=1)) / 2.0