def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no",fixed_shape=(None,n_in)) a_n = cgt.vector("a_n",dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") h0 = (o_no - 128.0)/128.0 nhid = 64 h1 = cgt.tanh(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(h0)) probs_na = nn.softmax(nn.Affine(nhid,n_actions,weight_init=nn.IIDGaussian(std=0.01))(h1)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n*q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params)
def test_cudnn(): compile_info = get_compile_info() if not (compile_info["CGT_ENABLE_CUDNN"] and compile_info["CGT_ENABLE_CUDA"]): raise SkipTest("CUDNN not enabled. Skipping this test") Xval = nr.randn(2,3,19,18) Wval = nr.randn(5,3,3,3) bval = nr.randn(1,5,1,1) X = cgt.tensor4("X", fixed_shape=Xval.shape) W = cgt.tensor4("W", fixed_shape=Wval.shape) b = cgt.tensor4("b", fixed_shape=bval.shape) Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1,1,1,1),[X, W, b]) Y2 = nr.randn(*cgt.core.infer_shape(Y)) fY = cgt.function([X,W,b],Y) Yval = fY(Xval,Wval,bval) cost = (Y*Y2).sum() fcost = cgt.function([X,W,b],cost) fgrad = cgt.function([X,W,b],cgt.grad(cost, [X,W,b])) angrads = fgrad(Xval,Wval,bval) nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval],eps=1e-3) for (nugrad,angrad) in zip(nugrads,angrads): assert np.allclose(nugrad, angrad, rtol=9e-3, atol=1e-7)
def test_cudnn(): if not get_compile_info()["CGT_ENABLE_CUDNN"]: raise SkipTest("CUDNN not enabled. Skipping this test") Xval = nr.randn(2, 3, 19, 18) Wval = nr.randn(5, 3, 3, 3) bval = nr.randn(1, 5, 1, 1) X = cgt.tensor4("X", fixed_shape=Xval.shape) W = cgt.tensor4("W", fixed_shape=Wval.shape) b = cgt.tensor4("b", fixed_shape=bval.shape) Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1, 1, 1, 1), [X, W, b]) Y2 = nr.randn(*cgt.core.infer_shape(Y)) fY = cgt.function([X, W, b], Y) Yval = fY(Xval, Wval, bval) cost = (Y * Y2).sum() fcost = cgt.function([X, W, b], cost) fgrad = cgt.function([X, W, b], cgt.grad(cost, [X, W, b])) angrads = fgrad(Xval, Wval, bval) nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval], eps=1e-3) for (nugrad, angrad) in zip(nugrads, angrads): assert np.allclose(nugrad, angrad)
def test_cudnn(): with cgt.scoped_update_config(precision="double",backend="native"): if not get_compile_info()["CGT_ENABLE_CUDNN"]: raise SkipTest("CUDNN not enabled. Skipping this test") Xval = nr.randn(2,3,19,18) Wval = nr.randn(5,3,3,3) bval = nr.randn(1,5,1,1) X = cgt.tensor4("X", fixed_shape=Xval.shape) W = cgt.tensor4("W", fixed_shape=Wval.shape) b = cgt.tensor4("b", fixed_shape=bval.shape) Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1,1,1,1),[X, W, b]) Y2 = nr.randn(*cgt.core.infer_shape(Y)) fY = cgt.function([X,W,b],Y) Yval = fY(Xval,Wval,bval) cost = (Y*Y2).sum() fcost = cgt.function([X,W,b],cost) fgrad = cgt.function([X,W,b],cgt.grad(cost, [X,W,b])) angrads = fgrad(Xval,Wval,bval) nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval],eps=1e-3) for (nugrad,angrad) in zip(nugrads,angrads): assert np.allclose(nugrad, angrad)
def gradcheck_model(cost, params, extravars=(), extravals=(), atol=1e-8, eps=1e-9): precision = cgt.get_precision() if precision == "single": cgt.utils.warn( "You're doing a gradient check with %s precision. Use double or better yet quad for best results" % (precision)) assert all(param.is_input() for param in params) assert len(extravars) == len(extravals) # Convert to Argument nodes param_args = [ cgt.core.Argument(typ=s.typ, name=s.name) if s.is_data() else s for s in params ] # Get new cost in terms o farguments cost = cgt.core.clone(cost, replace=dict(zip(params, param_args))) grads = cgt.grad(cost, param_args) paramvals = [param.op.get_value() for param in params] fcost = cgt.function(param_args, cost, givens=zip(extravars, extravals)) fgrad = cgt.function(param_args, grads, givens=zip(extravars, extravals)) angrads = fgrad(*paramvals) nugrads = numeric_grad_multi(fcost, paramvals, eps=eps) for (angrad, nugrad) in zip(angrads, nugrads): assert np.allclose(angrad, nugrad, atol=atol)
def check_scalar_grads(precision, backend): cgt.reset_config() np.random.seed(0) cgt.set_precision(precision) cgt.core.update_config(backend=backend) x = cgt.scalar('x') y = cgt.scalar('y') z = cgt.scalar('z') vars = [x,y,z] #pylint: disable=W0622 vals = nr.rand(len(vars))+1 PROB2RESULT = {} for ((key,_), cls) in it.chain( it.izip(core.UNARY_INFO.items(),it.repeat(core.ElwiseUnary)), it.izip(core.BINARY_INFO.items(),it.repeat(core.ElwiseBinary)) ): if key == "conj": print "skipping conj" continue utils.colorprint(utils.Color.YELLOW, "Testing %s\n"%key) if cls == core.ElwiseUnary: n_in = 1 op = cls(key) else: n_in = 2 op = cls(key, (True,True)) inputvars = vars[0:n_in] inputvals = vals[0:n_in] out = core.Result(op, inputvars) f = cgt.function(inputvars, out) try: grads = cgt.grad(out, inputvars) except core.NonDifferentiable: print "nondiff" continue if DISPLAY: print "Function:" cgt.print_tree(out) print "Gradient original:" cgt.print_tree(grads) print "Gradient simplified:" grads_simple = core.simplify(grads) if DISPLAY: cgt.print_tree(grads_simple) gradf = cgt.function(inputvars, grads) eps = {"single":1e-4,"double":1e-9}[precision] nugrad = numeric_grad(lambda li: f(*li), inputvals,eps=eps) #pylint: disable=W0640 cgtgrad = gradf(*inputvals) np.testing.assert_almost_equal(nugrad,cgtgrad,decimal={"single":3,"double":6}[precision]) grad_count = core.count_nodes(grads_simple) PROB2RESULT[key] = {} PROB2RESULT[key]["grad"] = grad_count if DISPLAY: from thirdparty.tabulate import tabulate print tabulate([[key,val["grad"]] for (key,val) in PROB2RESULT.iteritems()],headers=["funcname","gradcount"])
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim)) a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n*adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def test_scalars(): np.random.seed(0) x = cgt.scalar('x') y = cgt.scalar('y') z = cgt.scalar('z') vars = [x,y,z] #pylint: disable=W0622 vals = nr.rand(len(vars))+1 PROB2RESULT = {} for ((key,_), cls) in it.chain( it.izip(core.UNARY_INFO.items(),it.repeat(core.ElwiseUnary)), it.izip(core.BINARY_INFO.items(),it.repeat(core.ElwiseBinary)) ): if key == "conj": print "skipping conj" continue utils.colorprint(utils.Color.YELLOW, "Testing %s\n"%key) if cls == core.ElwiseUnary: n_in = 1 op = cls(key) else: n_in = 2 op = cls(key, (True,True)) inputvars = vars[0:n_in] inputvals = vals[0:n_in] out = core.Result(op, inputvars) f = cgt.function(inputvars, out) try: grads = cgt.grad(out, inputvars) except core.NonDifferentiable: print "nondiff" continue if DISPLAY: print "Function:" cgt.print_tree(out) print "Gradient original:" cgt.print_tree(grads) print "Gradient simplified:" grads_simple = core.simplify(grads) if DISPLAY: cgt.print_tree(grads_simple) gradf = cgt.function(inputvars, grads) eps = {"single":1e-4,"double":1e-9}[cgt.get_precision()] nugrad = numeric_grad(lambda li: f(*li), inputvals,eps=eps) #pylint: disable=W0640 cgtgrad = gradf(*inputvals) np.testing.assert_almost_equal(nugrad,cgtgrad,decimal={"single":3,"double":6}[cgt.get_precision()]) grad_count = core.count_nodes(grads_simple) PROB2RESULT[key] = {} PROB2RESULT[key]["grad"] = grad_count if DISPLAY: from thirdparty.tabulate import tabulate print tabulate([[key,val["grad"]] for (key,val) in PROB2RESULT.iteritems()],headers=["funcname","gradcount"])
def check_affine(f, *nu_inputs): types = ",".join(["{%s,%s}" % (x.dtype, x.ndim) for x in nu_inputs]) cgt.utils.colorprint(cgt.utils.Color.YELLOW, "Testing %s(%s)\n" % (f.__name__, types)) sy_inputs = map(tensor_like, nu_inputs) for (i, sy) in enumerate(sy_inputs): sy.name = "x%i" % i sy_result = f(*sy_inputs) def maybeprint(msg): if DISPLAY: print msg maybeprint("Function:") if DISPLAY: cgt.print_tree([sy_result]) f_cgt = cgt.function(sy_inputs, sy_result) sy_grads = cgt.grad(sy_result, sy_inputs) gradf_cgt = cgt.function(sy_inputs, sy_grads) sy_result_simple = core.simplify([sy_result]) sy_grads_simple = core.simplify(sy_grads) maybeprint("Gradient:") if DISPLAY: cgt.print_tree(sy_grads) maybeprint("Gradient after simplification:") if DISPLAY: cgt.print_tree(sy_grads_simple) out_true = f(*nu_inputs) out_cgt = f_cgt(*nu_inputs) grads_true = gradients_affine(f_cgt, nu_inputs, h=1e-4 if "max" in f.__name__ else 1e-1) grads_cgt = gradf_cgt(*nu_inputs) rtol = {"single": 1e-3, "double": 1e-5}[cgt.get_precision()] np.testing.assert_allclose(out_cgt, out_true, rtol=rtol) for (g_cgt, g_true) in zip(grads_cgt, grads_true): np.testing.assert_allclose(g_cgt, g_true, rtol=rtol) result_count = cgt.count_nodes(sy_result_simple) grad_count = cgt.count_nodes(sy_grads_simple) maybeprint("Result before: %i. after: %i" % (cgt.count_nodes([sy_result]), result_count)) maybeprint("Grad before: %i. after: %i" % (cgt.count_nodes(sy_grads), grad_count)) PROB2RESULT[f.__name__] = {} PROB2RESULT[f.__name__]["fn"] = result_count PROB2RESULT[f.__name__]["grad"] = grad_count
def __init__(self, xdim, args, dec="bernoulli"): self.xdim = xdim self.hdim = args.hdim self.zdim = args.zdim self.lmbda = args.lmbda # weight decay coefficient * 2 self.x = cgt.matrix("x", dtype=cgt.floatX) self.eps = cgt.matrix("eps", dtype=cgt.floatX) self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps) if dec == "bernoulli": # log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y) self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x) elif dec == "gaussian": self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x) else: raise RuntimeError("unrecognized decoder %" % dec) self.cost = (-cgt.sum(kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) + self.dec_mlp.cost) / args.batch_size self.params = self.enc_mlp.params + self.dec_mlp.params # L2 regularization self.gparams = [cgt.grad(self.cost, [p])[0] + self.lmbda * p for p in self.params] self.gaccums = [cgt.shared(np.zeros(p.op.get_value().shape, dtype=cgt.floatX)) for p in self.params] # XXX replace w/ adagrad update from nn ADAGRAD_EPS = 1e-10 # for stability self.updates = [ (param, param - args.lr * gparam / cgt.sqrt(gaccum + cgt.square(gparam) + ADAGRAD_EPS)) for param, gparam, gaccum in zip(self.params, self.gparams, self.gaccums) ] self.updates += [ (gaccum, gaccum + cgt.square(gparam)) for gaccum, gparam in zip(self.gaccums, self.gparams) ] self.train = cgt.function( [self.x, self.eps], self.cost, updates=self.updates ) self.test = cgt.function( [self.x, self.eps], self.cost, updates=None ) # can be used for semi-supervised learning for example self.encode = cgt.function( [self.x, self.eps], self.enc_mlp.out )
def test_array_wrapper(): xval = np.zeros(10) x = cgt.shared(xval) f = cgt.function([], [], updates=[(x, x + 1)]) f() g = cgt.function([], x.sum()) assert np.allclose(x.op.get_value(), xval + 1) xval2 = np.arange(10) x.op.set_value(xval2) print x.op.get_value() assert np.allclose(x.op.get_value(), xval2) assert g() == xval2.sum() f() assert np.allclose(x.op.get_value(), xval2 + 1) assert g() == (xval2 + 1).sum()
def test_im2col(): for settings in [ ((4,4),(0,0),(1,1)), ((3,3),(1,1),(2,2)), ((3,3),(1,1),(3,3)) ]: xval = np.arange(2*1*28*28).reshape(2,1,28,28).astype(cgt.floatX) x = cgt.tensor4("x", fixed_shape=xval.shape) y = im2col(x, *settings) h = cgt.constant(np.random.randn(*cgt.infer_shape(y))) cost = (y*h).sum() fcost = cgt.function([x],cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval,eps=1e-5) gana = fgrad(xval) assert np.allclose(gnum, gana)
def test_array_wrapper(): xval = np.zeros(10) x = cgt.shared(xval) f = cgt.function([],[],updates=[(x,x+1)]) f() g = cgt.function([],x.sum()) assert np.allclose(x.op.get_value(), xval+1) xval2 = np.arange(10) x.op.set_value(xval2) print x.op.get_value() assert np.allclose(x.op.get_value(), xval2) assert g() == xval2.sum() f() assert np.allclose(x.op.get_value(), xval2+1) assert g() == (xval2+1).sum()
def test_get_decoder_state(): batch_size = 32 feat_t_steps = 20 feat_num_features = 42 num_out_classes = 28 num_out_classes_true = num_out_classes + 2 # Start, end, are added decoder_size = 50 tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features)) tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_num_features), (batch_size, feat_num_features)) tau3 = np.reshape(np.random.normal(0.1, 0.2, batch_size*num_out_classes_true), (batch_size, num_out_classes_true)) feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features)) s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes, decoder_size=decoder_size, feature_size=feat_num_features) context_bf = cgt.matrix(fixed_shape=(batch_size, feat_num_features)) prev_out_bc = cgt.matrix(fixed_shape=(batch_size, num_out_classes_true)) state_i_bf = nn.parameter(nn.init_array(nn.IIDGaussian(0.1), (batch_size, decoder_size)), name="decoder_init") decoder_out = s.get_decoder_state(context_bf, prev_out_bc, state_i_bf) decode_fun = cgt.function([feats, context_bf, prev_out_bc], [decoder_out]) m = decode_fun(tau, tau2, tau3)[0] assert m.shape == (batch_size, decoder_size) assert np.mean(m) < 1.0
def test_conv(): try: import scipy.signal except ImportError: raise SkipTest("skipping because we don't have ndimage") np.random.seed(0) x = np.random.randn(2,2,5,17) filt = np.random.randn(3,2,4,7) filtrows = filt.shape[2] filtcols = filt.shape[3] batchsize = x.shape[0] outchans = filt.shape[0] out = np.zeros((batchsize,outchans,x.shape[2]+filtrows-1,x.shape[3]+filtcols-1)) for b in xrange(x.shape[0]): for inchan in xrange(x.shape[1]): for outchan in xrange(outchans): out[b,outchan] += scipy.signal.convolve2d(x[b,inchan],filt[outchan,inchan][::-1,::-1],mode='full') f = cgt.function([], nn.conv2d(cgt.constant(x), cgt.constant(filt), kernelshape=(filtrows,filtcols), pad=(filtrows-1, filtcols-1))) out1 = f() # out1 = cgt.numeric_eval1(nn.conv2d(cgt.constant(x), cgt.constant(f), kersize=(filtrows,filtcols)), {}) np.testing.assert_allclose(out, out1, atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
def test_setting_weights(): X = cgt.matrix("X", fixed_shape=(None, 28*28)) model = build_model(X, 0.0) nnbuilder.set_all_weights(model, 'mnist.p') y = cgt.vector("y", dtype='i8') cost = -cgt.mean(categorical.loglik(y, model)) selected_number = cgt.argmax(model, axis=1) err_nodrop = cgt.cast(cgt.not_equal(selected_number, y), cgt.floatX).mean() computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost]) Xdata, ydata = load_data() Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(3): tstart = time.time() elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
def check_conv(precision): cgt.reset_config() cgt.set_precision(precision) f = cgt.function([], nn.conv2d(cgt.constant(x), cgt.constant(filt), kernelshape=(filtrows,filtcols), pad=(filtrows-1, filtcols-1))) out1 = f() # out1 = cgt.numeric_eval1(nn.conv2d(cgt.constant(x), cgt.constant(f), kersize=(filtrows,filtcols)), {}) np.testing.assert_allclose(out, out1, atol={"single":1e-3,"double":1e-6}[precision])
def runtest(backend, precision): with cgt.scoped_update_config(backend='native', precision=precision): xval = np.zeros(10) x = cgt.shared(xval) f = cgt.function([], [], updates=[(x, x + 1)]) f() g = cgt.function([], x.sum()) assert np.allclose(x.op.get_value(), xval + 1) xval2 = np.arange(10) x.op.set_value(xval2) print x.op.get_value() assert np.allclose(x.op.get_value(), xval2) assert g() == xval2.sum() f() assert np.allclose(x.op.get_value(), xval2 + 1) assert g() == (xval2 + 1).sum()
def test_get_character_distribution(): batch_size = 32 feat_t_steps = 20 feat_num_features = 42 num_out_classes = 28 # This is the index of the start token. num_out_classes_true = num_out_classes + 2 # Add start and end tokens automatically. decoder_size = 50 tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features)) tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_num_features), (batch_size, feat_num_features)) tau3 = np.reshape(np.random.normal(0.1, 0.2, batch_size*decoder_size), (batch_size, decoder_size)) feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features)) s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes, decoder_size=decoder_size, feature_size=feat_num_features) context_bf = cgt.matrix(fixed_shape=(batch_size, feat_num_features)) state_bf = cgt.matrix(fixed_shape=(batch_size, decoder_size)) m_out = s.get_character_distribution(state_bf, context_bf) out_fun = cgt.function([feats, context_bf, state_bf], [m_out]) m = out_fun(tau, tau2, tau3)[0] assert m.shape == (batch_size, num_out_classes_true)
def test_devices(): N = 10 K = 3 compile_info = cgt.compilation.get_compile_info() cuda_enabled = compile_info["CGT_ENABLE_CUDA"] if not cuda_enabled: raise SkipTest("cuda disabled") Xval = np.random.randn(N, K).astype(cgt.floatX) wval = np.random.randn(K).astype(cgt.floatX) bval = np.asarray(np.random.randn()).astype(cgt.floatX) yval = np.random.randn(N).astype(cgt.floatX) with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")): X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu')) y_n = cgt.shared(yval, "y") w_k = cgt.shared(wval, "w") b = cgt.shared(bval, name="b") print "bval", bval ypred = cgt.dot(cgt.square(X_nk), w_k) + b err = cgt.sum(cgt.sin(ypred - y_n)) g = cgt.grad(err, [w_k, b]) outputs = [err] + g f = cgt.function([], [err] + g) results = f() print results assert np.allclose( results[0], np.sin(np.square(Xval).dot(wval) + bval - yval).sum())
def test_matmuls(): with cgt.scoped_update_config(parallel = True, backend="native"): m = 8 d = 1000 # build graph X = cgt.matrix("X") Y = cgt.matrix("Y") loss=0 for k in xrange(m): # loss = loss+cgt.sin(X*Y+k).sum() loss = loss+(X.dot(Y+k)).sum() f = cgt.function([X,Y], loss) # test things out! seed(0) X_val = randn(d, d) Y_val = randn(d, d) vals = [X_val, Y_val] tic = time.time() out = f(*vals) toc = time.time() print toc-tic
def test_shape_err(): with CaptureStderr(): with cgt.scoped_update_config(debug=True, backend="python"): x = cgt.vector() y = cgt.vector() f = cgt.function([x,y],x+y) f(np.zeros(3),np.zeros(4))
def function(inputs, outputs, updates=None, givens=None, allow_input_downcast=None, on_unused_input=None): if is_theano(): allow_input_downcast = allow_input_downcast or False on_unused_input = on_unused_input or 'raise' return theano.function(inputs, outputs, updates=updates, givens=givens, allow_input_downcast=allow_input_downcast, on_unused_input=on_unused_input) elif is_cgt(): return cgt.function(inputs, outputs, updates=updates, givens=givens) elif is_tf(): return TfFunctionWrapper(inputs=inputs, outputs=outputs, updates=updates, givens=givens) elif is_mxnet(): return MxFunctionWrapper(inputs=inputs, outputs=outputs, updates=updates, givens=givens)
def test_flatvec(): cgt.reset_config cgt.set_precision('double') cgt.core.update_config(backend="python") # XXX N = 10 K = 3 Xval = np.random.randn(N,K) wval = np.random.randn(K) bval = np.random.randn() yval = np.random.randn(N) X_nk = cgt.shared(Xval, "X") y_n = cgt.shared(yval, "y") w_k = cgt.shared(wval, "w") b = cgt.shared(bval, name="b") ypred = cgt.dot(X_nk, w_k) + b err = cgt.sum(cgt.square(ypred - y_n)) g = cgt.grad(err, [w_k, b]) g = core.simplify(g) pars = [w_k, b] flatx = nn.setup_contiguous_storage(pars) f = cgt.function([], [err,cgt.flatcat(g)])
def test_matmuls(): with cgt.scoped_update_config(parallel=True): m = 8 d = 1000 # build graph X = cgt.matrix("X") Y = cgt.matrix("Y") loss = 0 for k in xrange(m): # loss = loss+cgt.sin(X*Y+k).sum() loss = loss + (X.dot(Y + k)).sum() f = cgt.function([X, Y], loss) # test things out! seed(0) X_val = randn(d, d) Y_val = randn(d, d) vals = [X_val, Y_val] tic = time.time() out = f(*vals) toc = time.time() print toc - tic
def runtest(backend, precision): with cgt.scoped_update_config(backend='native',precision=precision): xval = np.zeros(10) x = cgt.shared(xval) f = cgt.function([],[],updates=[(x,x+1)]) f() g = cgt.function([],x.sum()) assert np.allclose(x.op.get_value(), xval+1) xval2 = np.arange(10) x.op.set_value(xval2) print x.op.get_value() assert np.allclose(x.op.get_value(), xval2) assert g() == xval2.sum() f() assert np.allclose(x.op.get_value(), xval2+1) assert g() == (xval2+1).sum()
def test_flatvec(): cgt.reset_config cgt.set_precision('double') cgt.core.update_config(backend="python") # XXX N = 10 K = 3 Xval = np.random.randn(N, K) wval = np.random.randn(K) bval = np.random.randn() yval = np.random.randn(N) X_nk = cgt.shared(Xval, "X") y_n = cgt.shared(yval, "y") w_k = cgt.shared(wval, "w") b = cgt.shared(bval, name="b") ypred = cgt.dot(X_nk, w_k) + b err = cgt.sum(cgt.square(ypred - y_n)) g = cgt.grad(err, [w_k, b]) g = core.simplify(g) pars = [w_k, b] flatx = nn.setup_contiguous_storage(pars) f = cgt.function([], [err, cgt.flatcat(g)])
def make_funcs(config, dbg_out={}): net_in, net_out = hybrid_network(config['num_inputs'], config['num_outputs'], config['num_units'], config['num_sto'], dbg_out=dbg_out) if not config['dbg_out_full']: dbg_out = {} # def f_sample(_inputs, num_samples=1, flatten=False): # _mean, _var = f_step(_inputs) # _samples = [] # for _m, _v in zip(_mean, _var): # _s = np.random.multivariate_normal(_m, np.diag(np.sqrt(_v)), num_samples) # if flatten: _samples.extend(_s) # else: _samples.append(_s) # return np.array(_samples) Y_gt = cgt.matrix("Y") Y_prec = cgt.tensor3('V', fixed_shape=(None, config['num_inputs'], config['num_inputs'])) params = nn.get_parameters(net_out) size_batch, size_out = net_out.shape inputs, outputs = [net_in], [net_out] if config['no_bias']: print "Excluding bias" params = [p for p in params if not p.name.endswith(".b")] loss_vec = dist.gaussian.logprob(Y_gt, net_out, Y_prec) if config['weight_decay'] > 0.: print "Applying penalty on parameter norm" params_flat = cgt.concatenate([p.flatten() for p in params]) loss_param = config['weight_decay'] * cgt.sum(params_flat ** 2) loss_vec -= loss_param # / size_batch loss = cgt.sum(loss_vec) / size_batch # TODO_TZ f_step seems not to fail if X has wrong dim f_step = cgt.function(inputs, outputs) f_surr = get_surrogate_func(inputs + [Y_prec, Y_gt], outputs, [loss_vec], params, _dbg_out=dbg_out) return params, f_step, None, None, None, f_surr
def test_devices(): N = 10 K = 3 compile_info = cgt.compilation.get_compile_info() cuda_enabled = compile_info["CGT_ENABLE_CUDA"] if not cuda_enabled: raise SkipTest("cuda disabled") Xval = np.random.randn(N,K).astype(cgt.floatX) wval = np.random.randn(K).astype(cgt.floatX) bval = np.asarray(np.random.randn()).astype(cgt.floatX) yval = np.random.randn(N).astype(cgt.floatX) with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")): X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu')) y_n = cgt.shared(yval, "y") w_k = cgt.shared(wval, "w") b = cgt.shared(bval, name="b") print "bval",bval ypred = cgt.dot(cgt.square(X_nk), w_k) + b err = cgt.sum(cgt.sin(ypred - y_n)) g = cgt.grad(err, [w_k, b]) outputs = [err]+g f = cgt.function([], [err]+g) results = f() print results assert np.allclose(results[0] , np.sin(np.square(Xval).dot(wval)+bval-yval).sum())
def make_loss_and_grad_and_step(arch, size_input, size_output, size_mem, size_batch, n_layers, n_unroll, k_in, k_h): # symbolic variables x_tnk = cgt.tensor3() targ_tnk = cgt.tensor3() #make_network = make_deep_lstm if arch=="lstm" else make_deep_gru make_network = make_deep_rrnn_rot_relu network = make_network(size_input, size_mem, n_layers, size_output, size_batch, k_in, k_h) init_hiddens = [ cgt.matrix() for _ in xrange(get_num_hiddens(arch, n_layers)) ] # TODO fixed sizes cur_hiddens = init_hiddens loss = 0 for t in xrange(n_unroll): outputs = network([x_tnk[t]] + cur_hiddens) cur_hiddens, prediction_logprobs = outputs[:-1], outputs[-1] # loss = loss + nn.categorical_negloglik(prediction_probs, targ_tnk[t]).sum() loss = loss - (prediction_logprobs * targ_tnk[t]).sum() cur_hiddens = outputs[:-1] final_hiddens = cur_hiddens loss = loss / (n_unroll * size_batch) params = network.get_parameters() gradloss = cgt.grad(loss, params) flatgrad = flatcat(gradloss) with utils.Message("compiling loss+grad"): f_loss_and_grad = cgt.function([x_tnk, targ_tnk] + init_hiddens, [loss, flatgrad] + final_hiddens) f_loss = cgt.function([x_tnk, targ_tnk] + init_hiddens, loss) assert len(init_hiddens) == len(final_hiddens) x_nk = cgt.matrix('x') outputs = network([x_nk] + init_hiddens) f_step = cgt.function([x_nk] + init_hiddens, outputs) # print "node count", cgt.count_nodes(flatgrad) return network, f_loss, f_loss_and_grad, f_step
def test_cpu_pool(**kwargs): np.random.seed(0) x = cgt.tensor4("x", fixed_shape=(2, 3, 5, 7)) y = max_pool_2d(x, (4, 4), (0, 0), (1, 1)) xval = np.random.randn(2, 3, 5, 7) hval = np.random.randn(*cgt.infer_shape(y)) h = cgt.constant(hval) cost = (y * h).sum() fcost = cgt.function([x], cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval) gana = fgrad(xval) assert np.allclose(gnum, gana)
def test_im2col(): for settings in [((4, 4), (0, 0), (1, 1)), ((3, 3), (1, 1), (2, 2)), ((3, 3), (1, 1), (3, 3))]: xval = np.arange(2 * 1 * 28 * 28).reshape(2, 1, 28, 28).astype(cgt.floatX) x = cgt.tensor4("x", fixed_shape=xval.shape) y = im2col(x, *settings) h = cgt.constant(np.random.randn(*cgt.infer_shape(y))) cost = (y * h).sum() fcost = cgt.function([x], cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval, eps=1e-5) gana = fgrad(xval) assert np.allclose(gnum, gana)
def test_pool(**kwargs): np.random.seed(0) x = cgt.tensor4("x", fixed_shape=(2,3,5,7)) y = max_pool_2d(x, (4,4),(0,0),(1,1)) xval = np.random.randn(2,3,5,7) hval = np.random.randn(*cgt.infer_shape(y)) h = cgt.constant(hval) cost = (y*h).sum() fcost = cgt.function([x], cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval) gana = fgrad(xval) assert np.allclose(gnum,gana)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest",action="store_true") parser.add_argument("--epochs",type=int,default=10) args = parser.parse_args() batchsize = 64 Xshape = (batchsize, 3, 32, 32) X = cgt.tensor4("X", fixed_shape = Xshape) y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4') conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=1e-4))(X) relu1 = nn.rectify(conv1) pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2)) conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(relu1) relu2 = nn.rectify(conv2) pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2)) conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(pool2) pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2)) relu3 = nn.rectify(pool3) d0,d1,d2,d3 = relu3.shape flatlayer = relu3.reshape([d0,d1*d2*d3]) nfeats = cgt.infer_shape(flatlayer)[1] ip1 = nn.Affine(nfeats, 10)(flatlayer) logprobs = nn.logsoftmax(ip1) loss = -logprobs[cgt.arange(batchsize), y].mean() params = nn.get_parameters(loss) updates = rmsprop_updates(loss, params, stepsize=1e-3) train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates) if args.profile: cgt.profiler.start() data = np.load("/Users/joschu/Data/cifar-10-batches-py/cifar10.npz") Xtrain = data["X_train"] ytrain = data["y_train"] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(args.epochs): for start in xrange(0, Xtrain.shape[0], batchsize): tstart = time.time() end = start+batchsize print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart if start > batchsize*5: break # elapsed = time.time() - tstart # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) # testerr, testloss = computeloss(Xtest, ytest) # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.profiler.print_stats() return if args.unittest: break
def main(): parser = argparse.ArgumentParser() parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest",action="store_true") parser.add_argument("--epochs",type=int,default=10) args = parser.parse_args() batchsize = 64 Xshape = (batchsize, 3, 32, 32) X = cgt.tensor4("X", fixed_shape = Xshape) y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4') conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=1e-4))(X) relu1 = nn.rectify(conv1) pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2)) conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(pool1) relu2 = nn.rectify(conv2) pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2)) conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(pool2) pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2)) relu3 = nn.rectify(pool3) d0,d1,d2,d3 = relu3.shape flatlayer = relu3.reshape([d0,d1*d2*d3]) nfeats = cgt.infer_shape(flatlayer)[1] ip1 = nn.Affine(nfeats, 10)(flatlayer) logprobs = nn.logsoftmax(ip1) loss = -logprobs[cgt.arange(batchsize), y].mean() params = nn.get_parameters(loss) updates = rmsprop_updates(loss, params, stepsize=1e-3) train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates) if args.profile: cgt.profiler.start() data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz") Xtrain = data["X_train"] ytrain = data["y_train"] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(args.epochs): for start in xrange(0, Xtrain.shape[0], batchsize): tstart = time.time() end = start+batchsize print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart if start > batchsize*5: break # elapsed = time.time() - tstart # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) # testerr, testloss = computeloss(Xtest, ytest) # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.profiler.print_stats() return if args.unittest: break
def test_update(): with cgt.scoped_update_config(parallel = True, backend="native"): xval = np.array(1.5) x = cgt.shared(xval) f = cgt.function([], x.sum(), updates=[(x,x+1)]) before = x.op.get_value().copy() f() after = x.op.get_value() assert np.allclose(after , before+1)
def make_updater_fc(): X = cgt.matrix("X", fixed_shape=(None, 28 * 28)) y = cgt.vector("y", dtype='i8') stepsize = cgt.scalar("stepsize") loss = build_fc_return_loss(X, y) params = nn.get_parameters(loss) gparams = cgt.grad(loss, params) updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)] return cgt.function([X, y, stepsize], loss, updates=updates)
def test_update(): with cgt.scoped_update_config(parallel=True): xval = np.array(1.5) x = cgt.shared(xval) f = cgt.function([], x.sum(), updates=[(x, x + 1)]) before = x.op.get_value().copy() f() after = x.op.get_value() assert np.allclose(after, before + 1)
def make_updater_fc(): X = cgt.matrix("X", fixed_shape=(None, 28 * 28)) y = cgt.vector("y", dtype="i8") stepsize = cgt.scalar("stepsize") loss = build_fc_return_loss(X, y) params = nn.get_parameters(loss) gparams = cgt.grad(loss, params) updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)] return cgt.function([X, y, stepsize], loss, updates=updates)
def main(num_epochs=NUM_EPOCHS): #cgt.set_precision('half') print("Building network ...") # Recurrent layers expect input of shape # (batch size, max sequence length, number of features) X = cgt.tensor3(name='X', fixed_shape=(N_BATCH, MAX_LENGTH, 2)) l_forward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN) l_backward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN, backwards=True) #l_forward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid) #l_backward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid, backwards=True) #l_forward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify) #l_backward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify, backwards=True) l_forward_slice = l_forward[:, MAX_LENGTH-1, :] # Take the last element in the forward slice time dimension l_backward_slice = l_backward[:, 0, :] # And the first element in the backward slice time dimension l_sum = cgt.concatenate([l_forward_slice, l_backward_slice], axis=1) l_out = nnbuilder.denseLayer(l_sum, num_units=1, activation=cgt.tanh) target_values = cgt.vector('target_output') predicted_values = l_out[:, 0] # For this task we only need the last value cost = cgt.mean((predicted_values - target_values)**2) # Compute SGD updates for training print("Computing updates ...") updates = nn.rmsprop(cost, nn.get_parameters(l_out), LEARNING_RATE) #updates = nn.nesterov_momentum(cost, nn.get_parameters(l_out), 0.05) # cgt functions for training and computing cost print("Compiling functions ...") train = cgt.function([X, target_values], cost, updates=updates) compute_cost = cgt.function([X, target_values], cost) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val = gen_data() print("Training ...") time_start = time.time() try: for epoch in range(num_epochs): for _ in range(EPOCH_SIZE): X, y, m = gen_data() train(X, y) cost_val = compute_cost(X_val, y_val) print("Epoch {} validation cost = {}".format(epoch+1, cost_val)) print ('Epoch took ' + str(time.time() - time_start)) time_start = time.time() except KeyboardInterrupt: pass
def main(): print("Loading data...") X = cgt.matrix("X", fixed_shape=(None, 28*28)) y = cgt.vector("y", dtype='i8') model = build_model(X, 0.0) loss = -cgt.mean(categorical.loglik(y, model)) updates = nn.rmsprop(loss, nn.get_parameters(loss), 0.01) train = cgt.function(inputs=[X, y], outputs=[], updates=updates) y_nodrop = cgt.argmax(model, axis=1) cost_nodrop = -cgt.mean(categorical.loglik(y, model)) err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop]) batch_size=128 Xdata, ydata = load_data() Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(3): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start+batch_size train(Xtrain[start:end], ytrain[start:end]) elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) nnbuilder.save_weights(model, 'mnist')
def test_multi_output(): for x in (cgt.scalar('x'), cgt.vector('x'), cgt.matrix('x')): for cls in (SinCos, SinCos2): y, z = core.unpack(core.Result(cls(), [x])) xnum = np.ones((3, ) * x.ndim, cgt.floatX) correct = (np.sin(xnum), np.cos(xnum)) yznum = cgt.numeric_eval([y, z], {x: xnum}) np.testing.assert_allclose(yznum, correct) f = cgt.function([x], [y, z]) np.testing.assert_allclose(f(xnum), correct)
def __init__(self, num_features=None, num_hidden=100): stepsize = 0.01 # with shape (batchsize, ncols) X = cgt.matrix("X", fixed_shape=(1, num_features)) # y: a symbolic variable representing the rewards, which are integers y = cgt.scalar("y", dtype='float64') hid1 = nn.rectify( nn.Affine(num_features, num_hidden, weight_init=nn.IIDGaussian(std=.1), bias_init=nn.Constant(1))(X) ) # One final fully-connected layer, and then a linear activation output for reward output = nn.Affine(num_hidden, 1, weight_init=nn.IIDGaussian(std=.1), bias_init=nn.Constant(1))(hid1) abs_deviation = cgt.abs(output - y).mean() params = nn.get_parameters(abs_deviation) gparams = cgt.grad(abs_deviation, params) updates = [(p, p-stepsize*gp) for (p, gp) in zip(params, gparams)] self.predictor = cgt.function([X], output) self.updater = cgt.function([X, y], abs_deviation, updates=updates)
def test_shape_err(): try: with CaptureStderr() as s: with cgt.scoped_update_config(debug=True): x = cgt.vector() y = cgt.vector() f = cgt.function([x,y],x+y) f(np.zeros(3),np.zeros(4)) except Exception as e: assert "f = cgt.function([x,y],x+y)" in s.getvalue()
def CGT_vLJ_Optimize(x): N = len(x) #cgt.set_precision('double') xt = cgt.vector('xt') vLJt = 0 for j in range(1,N): for i in range(j): rho = ((xt[i*D:i*D+D] - xt[j*D:j*D+D])**2).sum() vLJt += rho**(-6.0)-(rho**(-3.0)) f = cgt.function([xt],4*vLJt) dvLJc = cgt.grad(4*vLJt, xt) df = cgt.function([xt],dvLJc) CGT_BFGSres = optimize.minimize(f, np.ravel(x), \ method='L-BFGS-B', \ jac = df, \ options={'disp': False}) return np.reshape(CGT_BFGSres.x, (N,D))
def make_updater_convnet(): X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28)) # so shapes can be inferred y = cgt.vector("y", dtype="i8") stepsize = cgt.scalar("stepsize") loss = build_convnet_return_loss(X, y) params = nn.get_parameters(loss) gparams = cgt.grad(loss, params) updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)] return cgt.function([X, y, stepsize], loss, updates=updates)
def make_funcs(config, dbg_out=None): params, Xs, Ys, C_0, H_0, C_T, H_T, C_1, H_1 = lstm_network( config['rnn_steps'], config['num_inputs'], config['num_outputs'], config['num_units'], config['num_mems']) # basic size_batch = Xs[0].shape[0] dY = Ys[0].shape[-1] Ys_gt = [ cgt.matrix(fixed_shape=(size_batch, dY), name='Y%d' % t) for t in range(len(Ys)) ] Ys_var = [cgt.tensor3(fixed_shape=(size_batch, dY, dY)) for _ in Ys] net_inputs, net_outputs = Xs + C_0 + H_0 + Ys_var, Ys + C_T + H_T # calculate loss loss_vec = [] for i in range(len(Ys)): # if i == 0: continue _l = dist.gaussian.logprob(Ys_gt[i], Ys[i], Ys_var[i]) loss_vec.append(_l) loss_vec = cgt.add_multi(loss_vec) if config['weight_decay'] > 0.: params_flat = cgt.concatenate([p.flatten() for p in params]) loss_param = config['weight_decay'] * cgt.sum(params_flat**2) loss_vec -= loss_param # / size_batch loss = cgt.sum(loss_vec) / config['rnn_steps'] / size_batch grad = cgt.grad(loss, params) # functions def f_init(size_batch): c_0, h_0 = [], [] for _n_m in config['num_mems']: if _n_m > 0: c_0.append(np.zeros((size_batch, _n_m))) h_0.append(np.zeros((size_batch, _n_m))) return c_0, h_0 f_step = cgt.function([Xs[0]] + C_0 + H_0, [Ys[0]] + C_1 + H_1) f_loss = cgt.function(net_inputs + Ys_gt, loss) f_grad = cgt.function(net_inputs + Ys_gt, grad) f_surr = cgt.function(net_inputs + Ys_gt, [loss] + net_outputs + grad) return params, f_step, f_loss, f_grad, f_init, f_surr
def runTest(self): f1 = cgt.function1([], ()) assert f1() == () x = cgt.vector() xval = np.random.randn(1) f2 = cgt.function([x], [(x,x),(x,),()]) ytrue = [(xval,xval),(xval,),()] y = f2(xval) assert y==ytrue
def test_scalar_input(): x = cgt.scalar() f = cgt.function([x], x**2) xval = 2 yval = 4 assert np.allclose(f(2), 4) assert np.allclose(f(2.0), 4) assert np.allclose(f(np.array(2)), 4) assert np.allclose(f(np.array(2.0)), 4) assert np.allclose(f(np.array([2])[0]), 4) assert np.allclose(f(np.array([2.0])[0]), 4)
def make_updater_convnet(): X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28)) # so shapes can be inferred y = cgt.vector("y", dtype='i8') stepsize = cgt.scalar("stepsize") loss = build_convnet_return_loss(X, y) params = nn.get_parameters(loss) gparams = cgt.grad(loss, params) updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)] return cgt.function([X, y, stepsize], loss, updates=updates)
def CGT_dvLJ(x): N = len(x) xt = cgt.vector('xt') vLJt = 0 for j in range(1,N): for i in range(j): rho = ((xt[i*D:i*D+D] - xt[j*D:j*D+D])**2).sum() vLJt += rho**(-6.0)-(rho**(-3.0)) dvLJc = cgt.grad(4*vLJt, xt) df = cgt.function([xt],dvLJc) return df(np.ravel(x))
def make_funcs(opt, ntm, total_time, loss_timesteps): x_tbk = cgt.tensor3("x", fixed_shape=(total_time, opt.b, opt.k)) y_tbp = cgt.tensor3("y", fixed_shape=(total_time, opt.b, opt.p)) loss_timesteps = set(loss_timesteps) initial_states = make_ntm_initial_states(opt) params = ntm.get_parameters() + get_parameters(initial_states) # params = ntm.get_parameters() lossCE = 0 loss01 = 0 state_arrs = initial_states for t in xrange(total_time): tmp = ntm([x_tbk[t]] + state_arrs) raw_pred = tmp[0] state_arrs = tmp[1:4] if t in loss_timesteps: p_pred = cgt.sigmoid(raw_pred) ce = bernoulli_crossentropy( y_tbp[t], p_pred).sum() # cross-entropy of bernoulli distribution lossCE = lossCE + ce loss01 = loss01 + cgt.cast(cgt.equal(y_tbp[t], round01(p_pred)), cgt.floatX).sum() lossCE = lossCE / (len(loss_timesteps) * opt.p * opt.b) / np.log(2) loss01 = loss01 / (len(loss_timesteps) * opt.p * opt.b) gradloss = cgt.grad(lossCE, params) flatgrad = flatcat(gradloss) f_loss = cgt.function([x_tbk, y_tbp], lossCE) f_loss_and_grad = cgt.function([x_tbk, y_tbp], [lossCE, loss01, flatgrad]) print "number of nodes in computation graph:", core.count_nodes( [lossCE, loss01, flatgrad]) return f_loss, f_loss_and_grad, params
def test_sleeps(): with cgt.scoped_update_config(parallel=True): x = cgt.scalar('x') y1 = sleepfor(x, .1) y2 = sleepfor(x, .1) z = y1 + y2 fpar = cgt.function([x], z) tstart = time.time() fpar(0) elapsed = time.time() - tstart assert elapsed < .11
def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no", fixed_shape=(None, n_in)) a_n = cgt.vector("a_n", dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") h0 = (o_no - 128.0) / 128.0 nhid = 64 h1 = cgt.tanh( nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0)) probs_na = nn.softmax( nn.Affine(nhid, n_actions, weight_init=nn.IIDGaussian(std=0.01))(h1)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n * q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params)
def test_cpu_pool(): with cgt.scoped_update_config(precision="quad", backend="native"): print cgt.get_precision() ci = get_compile_info() np.random.seed(0) x = cgt.tensor4("x", fixed_shape=(2, 3, 5, 7)) y = max_pool_2d(x, (4, 4), (0, 0), (1, 1)) xval = np.random.randn(2, 3, 5, 7) hval = np.random.randn(*cgt.infer_shape(y)) h = cgt.constant(hval) cost = (y * h).sum() fcost = cgt.function([x], cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval) gana = fgrad(xval) assert np.allclose(gnum, gana)