Ejemplo n.º 1
0
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no",fixed_shape=(None,n_in))
        a_n = cgt.vector("a_n",dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0)/128.0 
        nhid = 64
        h1 = cgt.tanh(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(nn.Affine(nhid,n_actions,weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n*q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Ejemplo n.º 2
0
def test_cudnn():
    compile_info = get_compile_info()
    if not (compile_info["CGT_ENABLE_CUDNN"] and compile_info["CGT_ENABLE_CUDA"]):
        raise SkipTest("CUDNN not enabled. Skipping this test")

    Xval = nr.randn(2,3,19,18)
    Wval = nr.randn(5,3,3,3)
    bval = nr.randn(1,5,1,1)

    X = cgt.tensor4("X", fixed_shape=Xval.shape)
    W = cgt.tensor4("W", fixed_shape=Wval.shape)
    b = cgt.tensor4("b", fixed_shape=bval.shape)


    Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1,1,1,1),[X, W, b])

    Y2 = nr.randn(*cgt.core.infer_shape(Y))

    fY = cgt.function([X,W,b],Y)
    Yval = fY(Xval,Wval,bval)
    cost = (Y*Y2).sum()
    fcost = cgt.function([X,W,b],cost)
    fgrad = cgt.function([X,W,b],cgt.grad(cost, [X,W,b]))
    angrads = fgrad(Xval,Wval,bval)
    nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval],eps=1e-3)
    for (nugrad,angrad) in zip(nugrads,angrads):
        assert np.allclose(nugrad, angrad, rtol=9e-3, atol=1e-7) 
Ejemplo n.º 3
0
def test_cudnn():
    if not get_compile_info()["CGT_ENABLE_CUDNN"]:
        raise SkipTest("CUDNN not enabled. Skipping this test")

    Xval = nr.randn(2, 3, 19, 18)
    Wval = nr.randn(5, 3, 3, 3)
    bval = nr.randn(1, 5, 1, 1)

    X = cgt.tensor4("X", fixed_shape=Xval.shape)
    W = cgt.tensor4("W", fixed_shape=Wval.shape)
    b = cgt.tensor4("b", fixed_shape=bval.shape)

    Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1, 1, 1, 1), [X, W, b])

    Y2 = nr.randn(*cgt.core.infer_shape(Y))

    fY = cgt.function([X, W, b], Y)
    Yval = fY(Xval, Wval, bval)
    cost = (Y * Y2).sum()
    fcost = cgt.function([X, W, b], cost)
    fgrad = cgt.function([X, W, b], cgt.grad(cost, [X, W, b]))
    angrads = fgrad(Xval, Wval, bval)
    nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval], eps=1e-3)
    for (nugrad, angrad) in zip(nugrads, angrads):
        assert np.allclose(nugrad, angrad)
Ejemplo n.º 4
0
def test_cudnn():
    with cgt.scoped_update_config(precision="double",backend="native"):
        if not get_compile_info()["CGT_ENABLE_CUDNN"]:
            raise SkipTest("CUDNN not enabled. Skipping this test")

        Xval = nr.randn(2,3,19,18)
        Wval = nr.randn(5,3,3,3)
        bval = nr.randn(1,5,1,1)

        X = cgt.tensor4("X", fixed_shape=Xval.shape)
        W = cgt.tensor4("W", fixed_shape=Wval.shape)
        b = cgt.tensor4("b", fixed_shape=bval.shape)


        Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1,1,1,1),[X, W, b])

        Y2 = nr.randn(*cgt.core.infer_shape(Y))

        fY = cgt.function([X,W,b],Y)
        Yval = fY(Xval,Wval,bval)
        cost = (Y*Y2).sum()
        fcost = cgt.function([X,W,b],cost)
        fgrad = cgt.function([X,W,b],cgt.grad(cost, [X,W,b]))
        angrads = fgrad(Xval,Wval,bval)
        nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval],eps=1e-3)
        for (nugrad,angrad) in zip(nugrads,angrads):
            assert np.allclose(nugrad, angrad)
Ejemplo n.º 5
0
Archivo: __init__.py Proyecto: zxie/cgt
def gradcheck_model(cost,
                    params,
                    extravars=(),
                    extravals=(),
                    atol=1e-8,
                    eps=1e-9):
    precision = cgt.get_precision()
    if precision == "single":
        cgt.utils.warn(
            "You're doing a gradient check with %s precision. Use double or better yet quad for best results"
            % (precision))
    assert all(param.is_input() for param in params)
    assert len(extravars) == len(extravals)

    # Convert to Argument nodes
    param_args = [
        cgt.core.Argument(typ=s.typ, name=s.name) if s.is_data() else s
        for s in params
    ]

    # Get new cost in terms o farguments
    cost = cgt.core.clone(cost, replace=dict(zip(params, param_args)))

    grads = cgt.grad(cost, param_args)
    paramvals = [param.op.get_value() for param in params]
    fcost = cgt.function(param_args, cost, givens=zip(extravars, extravals))
    fgrad = cgt.function(param_args, grads, givens=zip(extravars, extravals))

    angrads = fgrad(*paramvals)
    nugrads = numeric_grad_multi(fcost, paramvals, eps=eps)

    for (angrad, nugrad) in zip(angrads, nugrads):
        assert np.allclose(angrad, nugrad, atol=atol)
Ejemplo n.º 6
0
def check_scalar_grads(precision, backend):
    cgt.reset_config()
    np.random.seed(0)
    cgt.set_precision(precision)
    cgt.core.update_config(backend=backend)
    x = cgt.scalar('x')
    y = cgt.scalar('y')
    z = cgt.scalar('z')
    vars = [x,y,z] #pylint: disable=W0622
    vals = nr.rand(len(vars))+1

    PROB2RESULT = {}

    for ((key,_), cls) in it.chain(
            it.izip(core.UNARY_INFO.items(),it.repeat(core.ElwiseUnary)),
            it.izip(core.BINARY_INFO.items(),it.repeat(core.ElwiseBinary))
            ):
        if key == "conj":
            print "skipping conj"
            continue
        utils.colorprint(utils.Color.YELLOW, "Testing %s\n"%key)
        if cls == core.ElwiseUnary:
            n_in = 1
            op = cls(key)
        else:
            n_in = 2
            op = cls(key, (True,True))
        inputvars = vars[0:n_in]
        inputvals = vals[0:n_in]
        out = core.Result(op, inputvars)
        f = cgt.function(inputvars, out)
        try:
            grads = cgt.grad(out, inputvars)
        except core.NonDifferentiable:
            print "nondiff"
            continue
        if DISPLAY:
            print "Function:"
            cgt.print_tree(out)
            print "Gradient original:"
            cgt.print_tree(grads)
            print "Gradient simplified:"
        grads_simple = core.simplify(grads)
        if DISPLAY: cgt.print_tree(grads_simple)
        gradf = cgt.function(inputvars, grads)
        eps = {"single":1e-4,"double":1e-9}[precision]
        nugrad = numeric_grad(lambda li: f(*li), inputvals,eps=eps) #pylint: disable=W0640
        cgtgrad = gradf(*inputvals)
        np.testing.assert_almost_equal(nugrad,cgtgrad,decimal={"single":3,"double":6}[precision])

        grad_count = core.count_nodes(grads_simple)
        PROB2RESULT[key] = {}
        PROB2RESULT[key]["grad"] = grad_count

    if DISPLAY:
        from thirdparty.tabulate import tabulate
        print tabulate([[key,val["grad"]] for (key,val) in PROB2RESULT.iteritems()],headers=["funcname","gradcount"])    
Ejemplo n.º 7
0
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim))
        a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim]

        logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n*adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean()


        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])
        self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])

        self.pc = ParamCollection(params)
Ejemplo n.º 8
0
def test_scalars():
    np.random.seed(0)
    x = cgt.scalar('x')
    y = cgt.scalar('y')
    z = cgt.scalar('z')
    vars = [x,y,z] #pylint: disable=W0622
    vals = nr.rand(len(vars))+1

    PROB2RESULT = {}

    for ((key,_), cls) in it.chain(
            it.izip(core.UNARY_INFO.items(),it.repeat(core.ElwiseUnary)),
            it.izip(core.BINARY_INFO.items(),it.repeat(core.ElwiseBinary))
            ):
        if key == "conj":
            print "skipping conj"
            continue
        utils.colorprint(utils.Color.YELLOW, "Testing %s\n"%key)
        if cls == core.ElwiseUnary:
            n_in = 1
            op = cls(key)
        else:
            n_in = 2
            op = cls(key, (True,True))
        inputvars = vars[0:n_in]
        inputvals = vals[0:n_in]
        out = core.Result(op, inputvars)
        f = cgt.function(inputvars, out)
        try:
            grads = cgt.grad(out, inputvars)
        except core.NonDifferentiable:
            print "nondiff"
            continue
        if DISPLAY:
            print "Function:"
            cgt.print_tree(out)
            print "Gradient original:"
            cgt.print_tree(grads)
            print "Gradient simplified:"
        grads_simple = core.simplify(grads)
        if DISPLAY: cgt.print_tree(grads_simple)
        gradf = cgt.function(inputvars, grads)
        eps = {"single":1e-4,"double":1e-9}[cgt.get_precision()]
        nugrad = numeric_grad(lambda li: f(*li), inputvals,eps=eps) #pylint: disable=W0640
        cgtgrad = gradf(*inputvals)
        np.testing.assert_almost_equal(nugrad,cgtgrad,decimal={"single":3,"double":6}[cgt.get_precision()])

        grad_count = core.count_nodes(grads_simple)
        PROB2RESULT[key] = {}
        PROB2RESULT[key]["grad"] = grad_count

    if DISPLAY:
        from thirdparty.tabulate import tabulate
        print tabulate([[key,val["grad"]] for (key,val) in PROB2RESULT.iteritems()],headers=["funcname","gradcount"])    
Ejemplo n.º 9
0
def check_affine(f, *nu_inputs):
    types = ",".join(["{%s,%s}" % (x.dtype, x.ndim) for x in nu_inputs])
    cgt.utils.colorprint(cgt.utils.Color.YELLOW,
                         "Testing %s(%s)\n" % (f.__name__, types))
    sy_inputs = map(tensor_like, nu_inputs)
    for (i, sy) in enumerate(sy_inputs):
        sy.name = "x%i" % i

    sy_result = f(*sy_inputs)

    def maybeprint(msg):
        if DISPLAY: print msg

    maybeprint("Function:")
    if DISPLAY: cgt.print_tree([sy_result])

    f_cgt = cgt.function(sy_inputs, sy_result)
    sy_grads = cgt.grad(sy_result, sy_inputs)
    gradf_cgt = cgt.function(sy_inputs, sy_grads)

    sy_result_simple = core.simplify([sy_result])
    sy_grads_simple = core.simplify(sy_grads)

    maybeprint("Gradient:")
    if DISPLAY: cgt.print_tree(sy_grads)

    maybeprint("Gradient after simplification:")
    if DISPLAY: cgt.print_tree(sy_grads_simple)

    out_true = f(*nu_inputs)
    out_cgt = f_cgt(*nu_inputs)

    grads_true = gradients_affine(f_cgt,
                                  nu_inputs,
                                  h=1e-4 if "max" in f.__name__ else 1e-1)
    grads_cgt = gradf_cgt(*nu_inputs)

    rtol = {"single": 1e-3, "double": 1e-5}[cgt.get_precision()]
    np.testing.assert_allclose(out_cgt, out_true, rtol=rtol)

    for (g_cgt, g_true) in zip(grads_cgt, grads_true):
        np.testing.assert_allclose(g_cgt, g_true, rtol=rtol)

    result_count = cgt.count_nodes(sy_result_simple)
    grad_count = cgt.count_nodes(sy_grads_simple)
    maybeprint("Result before: %i. after: %i" %
               (cgt.count_nodes([sy_result]), result_count))
    maybeprint("Grad before: %i. after: %i" %
               (cgt.count_nodes(sy_grads), grad_count))

    PROB2RESULT[f.__name__] = {}
    PROB2RESULT[f.__name__]["fn"] = result_count
    PROB2RESULT[f.__name__]["grad"] = grad_count
Ejemplo n.º 10
0
def check_affine(f, *nu_inputs):
    types = ",".join(["{%s,%s}" % (x.dtype, x.ndim) for x in nu_inputs])
    cgt.utils.colorprint(cgt.utils.Color.YELLOW, "Testing %s(%s)\n" % (f.__name__, types))
    sy_inputs = map(tensor_like, nu_inputs)
    for (i, sy) in enumerate(sy_inputs):
        sy.name = "x%i" % i

    sy_result = f(*sy_inputs)

    def maybeprint(msg):
        if DISPLAY:
            print msg

    maybeprint("Function:")
    if DISPLAY:
        cgt.print_tree([sy_result])

    f_cgt = cgt.function(sy_inputs, sy_result)
    sy_grads = cgt.grad(sy_result, sy_inputs)
    gradf_cgt = cgt.function(sy_inputs, sy_grads)

    sy_result_simple = core.simplify([sy_result])
    sy_grads_simple = core.simplify(sy_grads)

    maybeprint("Gradient:")
    if DISPLAY:
        cgt.print_tree(sy_grads)

    maybeprint("Gradient after simplification:")
    if DISPLAY:
        cgt.print_tree(sy_grads_simple)

    out_true = f(*nu_inputs)
    out_cgt = f_cgt(*nu_inputs)

    grads_true = gradients_affine(f_cgt, nu_inputs, h=1e-4 if "max" in f.__name__ else 1e-1)
    grads_cgt = gradf_cgt(*nu_inputs)

    rtol = {"single": 1e-3, "double": 1e-5}[cgt.get_precision()]
    np.testing.assert_allclose(out_cgt, out_true, rtol=rtol)

    for (g_cgt, g_true) in zip(grads_cgt, grads_true):
        np.testing.assert_allclose(g_cgt, g_true, rtol=rtol)

    result_count = cgt.count_nodes(sy_result_simple)
    grad_count = cgt.count_nodes(sy_grads_simple)
    maybeprint("Result before: %i. after: %i" % (cgt.count_nodes([sy_result]), result_count))
    maybeprint("Grad before: %i. after: %i" % (cgt.count_nodes(sy_grads), grad_count))

    PROB2RESULT[f.__name__] = {}
    PROB2RESULT[f.__name__]["fn"] = result_count
    PROB2RESULT[f.__name__]["grad"] = grad_count
Ejemplo n.º 11
0
    def __init__(self, xdim, args, dec="bernoulli"):
        self.xdim = xdim
        self.hdim = args.hdim
        self.zdim = args.zdim
        self.lmbda = args.lmbda  # weight decay coefficient * 2
        self.x = cgt.matrix("x", dtype=cgt.floatX)
        self.eps = cgt.matrix("eps", dtype=cgt.floatX)

        self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps)
        if dec == "bernoulli":
            # log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y)
            self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x)
        elif dec == "gaussian":
            self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x)
        else:
            raise RuntimeError("unrecognized decoder %" % dec)

        self.cost = (-cgt.sum(kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) + self.dec_mlp.cost) / args.batch_size
        self.params = self.enc_mlp.params + self.dec_mlp.params
        # L2 regularization
        self.gparams = [cgt.grad(self.cost, [p])[0] + self.lmbda * p for p in self.params]
        self.gaccums = [cgt.shared(np.zeros(p.op.get_value().shape, dtype=cgt.floatX)) for p in self.params]

        # XXX replace w/ adagrad update from nn
        ADAGRAD_EPS = 1e-10  # for stability
        self.updates = [
            (param, param - args.lr * gparam / cgt.sqrt(gaccum + cgt.square(gparam) + ADAGRAD_EPS))
            for param, gparam, gaccum in zip(self.params, self.gparams, self.gaccums)
        ]
        self.updates += [
            (gaccum, gaccum + cgt.square(gparam))
            for gaccum, gparam in zip(self.gaccums, self.gparams)
        ]

        self.train = cgt.function(
            [self.x, self.eps],
            self.cost,
            updates=self.updates
        )
        self.test = cgt.function(
            [self.x, self.eps],
            self.cost,
            updates=None
        )
        # can be used for semi-supervised learning for example
        self.encode = cgt.function(
            [self.x, self.eps],
            self.enc_mlp.out
        )
Ejemplo n.º 12
0
def test_array_wrapper():
    xval = np.zeros(10)
    x = cgt.shared(xval)
    f = cgt.function([], [], updates=[(x, x + 1)])
    f()
    g = cgt.function([], x.sum())
    assert np.allclose(x.op.get_value(), xval + 1)
    xval2 = np.arange(10)
    x.op.set_value(xval2)
    print x.op.get_value()
    assert np.allclose(x.op.get_value(), xval2)
    assert g() == xval2.sum()
    f()
    assert np.allclose(x.op.get_value(), xval2 + 1)
    assert g() == (xval2 + 1).sum()
Ejemplo n.º 13
0
def test_im2col():
    for settings in [ ((4,4),(0,0),(1,1)), ((3,3),(1,1),(2,2)), ((3,3),(1,1),(3,3)) ]:
        xval = np.arange(2*1*28*28).reshape(2,1,28,28).astype(cgt.floatX)
        x = cgt.tensor4("x", fixed_shape=xval.shape)
        y = im2col(x, *settings)
        h = cgt.constant(np.random.randn(*cgt.infer_shape(y)))
        cost = (y*h).sum()

        fcost = cgt.function([x],cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval,eps=1e-5)
        gana = fgrad(xval)
        assert np.allclose(gnum, gana)
Ejemplo n.º 14
0
def test_array_wrapper():
    xval = np.zeros(10)
    x = cgt.shared(xval)
    f = cgt.function([],[],updates=[(x,x+1)])
    f()
    g = cgt.function([],x.sum())
    assert np.allclose(x.op.get_value(), xval+1)
    xval2 = np.arange(10)
    x.op.set_value(xval2)
    print x.op.get_value()
    assert np.allclose(x.op.get_value(), xval2)
    assert g() == xval2.sum()
    f()
    assert np.allclose(x.op.get_value(), xval2+1)
    assert g() == (xval2+1).sum()
Ejemplo n.º 15
0
def test_get_decoder_state():
    batch_size = 32
    feat_t_steps = 20
    feat_num_features = 42
    num_out_classes = 28
    num_out_classes_true = num_out_classes + 2  # Start, end, are added
    decoder_size = 50

    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_num_features), (batch_size, feat_num_features))
    tau3 = np.reshape(np.random.normal(0.1, 0.2, batch_size*num_out_classes_true), (batch_size, num_out_classes_true))

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes,
                          decoder_size=decoder_size, feature_size=feat_num_features)

    context_bf = cgt.matrix(fixed_shape=(batch_size, feat_num_features))
    prev_out_bc = cgt.matrix(fixed_shape=(batch_size, num_out_classes_true))
    state_i_bf = nn.parameter(nn.init_array(nn.IIDGaussian(0.1), (batch_size, decoder_size)), name="decoder_init")
    decoder_out = s.get_decoder_state(context_bf, prev_out_bc, state_i_bf)
    decode_fun = cgt.function([feats, context_bf, prev_out_bc], [decoder_out])

    m = decode_fun(tau, tau2, tau3)[0]
    assert m.shape == (batch_size, decoder_size)
    assert np.mean(m) < 1.0
Ejemplo n.º 16
0
def test_conv():
    try:
        import scipy.signal
    except ImportError:
        raise SkipTest("skipping because we don't have ndimage")

    np.random.seed(0)
    x = np.random.randn(2,2,5,17)
    filt = np.random.randn(3,2,4,7)

    filtrows = filt.shape[2]
    filtcols = filt.shape[3]

    batchsize = x.shape[0]
    outchans = filt.shape[0]

    out = np.zeros((batchsize,outchans,x.shape[2]+filtrows-1,x.shape[3]+filtcols-1))
    for b in xrange(x.shape[0]):
        for inchan in xrange(x.shape[1]):
            for outchan in xrange(outchans):
                out[b,outchan] += scipy.signal.convolve2d(x[b,inchan],filt[outchan,inchan][::-1,::-1],mode='full')

    f = cgt.function([], nn.conv2d(cgt.constant(x), cgt.constant(filt), kernelshape=(filtrows,filtcols), pad=(filtrows-1, filtcols-1)))
    out1 = f()
    # out1 = cgt.numeric_eval1(nn.conv2d(cgt.constant(x), cgt.constant(f), kersize=(filtrows,filtcols)), {})
    np.testing.assert_allclose(out, out1, atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
Ejemplo n.º 17
0
def test_setting_weights():
    X = cgt.matrix("X", fixed_shape=(None, 28*28))
    model = build_model(X, 0.0)
    nnbuilder.set_all_weights(model, 'mnist.p')
    y = cgt.vector("y", dtype='i8')
    cost = -cgt.mean(categorical.loglik(y, model))
    selected_number = cgt.argmax(model, axis=1)
    err_nodrop = cgt.cast(cgt.not_equal(selected_number, y), cgt.floatX).mean()
    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost])

    Xdata, ydata = load_data()

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(3):
        tstart = time.time()
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
Ejemplo n.º 18
0
 def check_conv(precision):
     cgt.reset_config()
     cgt.set_precision(precision)
     f = cgt.function([], nn.conv2d(cgt.constant(x), cgt.constant(filt), kernelshape=(filtrows,filtcols), pad=(filtrows-1, filtcols-1)))
     out1 = f()
     # out1 = cgt.numeric_eval1(nn.conv2d(cgt.constant(x), cgt.constant(f), kersize=(filtrows,filtcols)), {})
     np.testing.assert_allclose(out, out1, atol={"single":1e-3,"double":1e-6}[precision])
Ejemplo n.º 19
0
def runtest(backend, precision):
    with cgt.scoped_update_config(backend='native', precision=precision):
        xval = np.zeros(10)
        x = cgt.shared(xval)
        f = cgt.function([], [], updates=[(x, x + 1)])
        f()
        g = cgt.function([], x.sum())
        assert np.allclose(x.op.get_value(), xval + 1)
        xval2 = np.arange(10)
        x.op.set_value(xval2)
        print x.op.get_value()
        assert np.allclose(x.op.get_value(), xval2)
        assert g() == xval2.sum()
        f()
        assert np.allclose(x.op.get_value(), xval2 + 1)
        assert g() == (xval2 + 1).sum()
Ejemplo n.º 20
0
def test_get_character_distribution():
    batch_size = 32
    feat_t_steps = 20
    feat_num_features = 42
    num_out_classes = 28  # This is the index of the start token.
    num_out_classes_true = num_out_classes + 2  # Add start and end tokens automatically.
    decoder_size = 50

    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_num_features), (batch_size, feat_num_features))
    tau3 = np.reshape(np.random.normal(0.1, 0.2, batch_size*decoder_size), (batch_size, decoder_size))

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes,
                          decoder_size=decoder_size, feature_size=feat_num_features)

    context_bf = cgt.matrix(fixed_shape=(batch_size, feat_num_features))
    state_bf = cgt.matrix(fixed_shape=(batch_size, decoder_size))

    m_out = s.get_character_distribution(state_bf, context_bf)

    out_fun = cgt.function([feats, context_bf, state_bf], [m_out])
    m = out_fun(tau, tau2, tau3)[0]

    assert m.shape == (batch_size, num_out_classes_true)
Ejemplo n.º 21
0
def test_devices():
    N = 10
    K = 3

    compile_info = cgt.compilation.get_compile_info()
    cuda_enabled = compile_info["CGT_ENABLE_CUDA"]
    if not cuda_enabled:
        raise SkipTest("cuda disabled")

    Xval = np.random.randn(N, K).astype(cgt.floatX)
    wval = np.random.randn(K).astype(cgt.floatX)
    bval = np.asarray(np.random.randn()).astype(cgt.floatX)
    yval = np.random.randn(N).astype(cgt.floatX)

    with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")):

        X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu'))
        y_n = cgt.shared(yval, "y")
        w_k = cgt.shared(wval, "w")
        b = cgt.shared(bval, name="b")

        print "bval", bval

        ypred = cgt.dot(cgt.square(X_nk), w_k) + b

        err = cgt.sum(cgt.sin(ypred - y_n))
        g = cgt.grad(err, [w_k, b])
        outputs = [err] + g
        f = cgt.function([], [err] + g)
        results = f()
        print results
        assert np.allclose(
            results[0],
            np.sin(np.square(Xval).dot(wval) + bval - yval).sum())
Ejemplo n.º 22
0
def test_matmuls():
    with cgt.scoped_update_config(parallel = True, backend="native"):

        m = 8
        d = 1000

        # build graph

        X = cgt.matrix("X")
        Y = cgt.matrix("Y")
        loss=0
        for k in xrange(m):
            # loss = loss+cgt.sin(X*Y+k).sum()
            loss = loss+(X.dot(Y+k)).sum()

            f = cgt.function([X,Y], loss)

        # test things out!

        seed(0)

        X_val = randn(d, d)
        Y_val = randn(d, d)
        vals = [X_val, Y_val]

        tic = time.time()
        out = f(*vals)
        toc = time.time()

        print toc-tic
Ejemplo n.º 23
0
def test_shape_err():
    with CaptureStderr():
        with cgt.scoped_update_config(debug=True, backend="python"):
            x = cgt.vector()
            y = cgt.vector()
            f = cgt.function([x,y],x+y)
            f(np.zeros(3),np.zeros(4))
Ejemplo n.º 24
0
def function(inputs,
             outputs,
             updates=None,
             givens=None,
             allow_input_downcast=None,
             on_unused_input=None):
    if is_theano():
        allow_input_downcast = allow_input_downcast or False
        on_unused_input = on_unused_input or 'raise'
        return theano.function(inputs,
                               outputs,
                               updates=updates,
                               givens=givens,
                               allow_input_downcast=allow_input_downcast,
                               on_unused_input=on_unused_input)
    elif is_cgt():
        return cgt.function(inputs, outputs, updates=updates, givens=givens)
    elif is_tf():
        return TfFunctionWrapper(inputs=inputs,
                                 outputs=outputs,
                                 updates=updates,
                                 givens=givens)
    elif is_mxnet():
        return MxFunctionWrapper(inputs=inputs,
                                 outputs=outputs,
                                 updates=updates,
                                 givens=givens)
Ejemplo n.º 25
0
def test_flatvec():
    cgt.reset_config
    cgt.set_precision('double')
    cgt.core.update_config(backend="python") # XXX

    N = 10
    K = 3

    Xval = np.random.randn(N,K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.shared(Xval, "X")
    y_n = cgt.shared(yval, "y")
    w_k = cgt.shared(wval, "w")
    b = cgt.shared(bval, name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])
    g = core.simplify(g)

    pars = [w_k, b]
    flatx = nn.setup_contiguous_storage(pars)
    f = cgt.function([], [err,cgt.flatcat(g)])
Ejemplo n.º 26
0
def test_matmuls():
    with cgt.scoped_update_config(parallel=True):

        m = 8
        d = 1000

        # build graph

        X = cgt.matrix("X")
        Y = cgt.matrix("Y")
        loss = 0
        for k in xrange(m):
            # loss = loss+cgt.sin(X*Y+k).sum()
            loss = loss + (X.dot(Y + k)).sum()

            f = cgt.function([X, Y], loss)

        # test things out!

        seed(0)

        X_val = randn(d, d)
        Y_val = randn(d, d)
        vals = [X_val, Y_val]

        tic = time.time()
        out = f(*vals)
        toc = time.time()

        print toc - tic
Ejemplo n.º 27
0
def runtest(backend, precision):
    with cgt.scoped_update_config(backend='native',precision=precision):
        xval = np.zeros(10)
        x = cgt.shared(xval)
        f = cgt.function([],[],updates=[(x,x+1)])
        f()
        g = cgt.function([],x.sum())
        assert np.allclose(x.op.get_value(), xval+1)
        xval2 = np.arange(10)
        x.op.set_value(xval2)
        print x.op.get_value()
        assert np.allclose(x.op.get_value(), xval2)
        assert g() == xval2.sum()
        f()
        assert np.allclose(x.op.get_value(), xval2+1)
        assert g() == (xval2+1).sum()
Ejemplo n.º 28
0
def test_flatvec():
    cgt.reset_config
    cgt.set_precision('double')
    cgt.core.update_config(backend="python")  # XXX

    N = 10
    K = 3

    Xval = np.random.randn(N, K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.shared(Xval, "X")
    y_n = cgt.shared(yval, "y")
    w_k = cgt.shared(wval, "w")
    b = cgt.shared(bval, name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])
    g = core.simplify(g)

    pars = [w_k, b]
    flatx = nn.setup_contiguous_storage(pars)
    f = cgt.function([], [err, cgt.flatcat(g)])
Ejemplo n.º 29
0
Archivo: sfnn.py Proyecto: TZ2016/snn
def make_funcs(config, dbg_out={}):
    net_in, net_out = hybrid_network(config['num_inputs'], config['num_outputs'],
                                     config['num_units'], config['num_sto'],
                                     dbg_out=dbg_out)
    if not config['dbg_out_full']: dbg_out = {}
    # def f_sample(_inputs, num_samples=1, flatten=False):
    #     _mean, _var = f_step(_inputs)
    #     _samples = []
    #     for _m, _v in zip(_mean, _var):
    #         _s = np.random.multivariate_normal(_m, np.diag(np.sqrt(_v)), num_samples)
    #         if flatten: _samples.extend(_s)
    #         else: _samples.append(_s)
    #     return np.array(_samples)
    Y_gt = cgt.matrix("Y")
    Y_prec = cgt.tensor3('V', fixed_shape=(None, config['num_inputs'], config['num_inputs']))
    params = nn.get_parameters(net_out)
    size_batch, size_out = net_out.shape
    inputs, outputs = [net_in], [net_out]
    if config['no_bias']:
        print "Excluding bias"
        params = [p for p in params if not p.name.endswith(".b")]
    loss_vec = dist.gaussian.logprob(Y_gt, net_out, Y_prec)
    if config['weight_decay'] > 0.:
        print "Applying penalty on parameter norm"
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat ** 2)
        loss_vec -= loss_param # / size_batch
    loss = cgt.sum(loss_vec) / size_batch

    # TODO_TZ f_step seems not to fail if X has wrong dim
    f_step = cgt.function(inputs, outputs)
    f_surr = get_surrogate_func(inputs + [Y_prec, Y_gt], outputs,
                                [loss_vec], params, _dbg_out=dbg_out)

    return params, f_step, None, None, None, f_surr
Ejemplo n.º 30
0
def test_devices():
    N = 10
    K = 3

    compile_info = cgt.compilation.get_compile_info()
    cuda_enabled = compile_info["CGT_ENABLE_CUDA"]
    if not cuda_enabled:
        raise SkipTest("cuda disabled")

    Xval = np.random.randn(N,K).astype(cgt.floatX)
    wval = np.random.randn(K).astype(cgt.floatX)
    bval = np.asarray(np.random.randn()).astype(cgt.floatX)
    yval = np.random.randn(N).astype(cgt.floatX)

    with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")):

        X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu'))
        y_n = cgt.shared(yval, "y")
        w_k = cgt.shared(wval, "w")
        b = cgt.shared(bval, name="b")

        print "bval",bval

        ypred = cgt.dot(cgt.square(X_nk), w_k) + b

        err = cgt.sum(cgt.sin(ypred - y_n))
        g = cgt.grad(err, [w_k, b])
        outputs = [err]+g
        f = cgt.function([], [err]+g)
        results = f()
        print results
        assert np.allclose(results[0] , np.sin(np.square(Xval).dot(wval)+bval-yval).sum())
Ejemplo n.º 31
0
Archivo: rrnn.py Proyecto: zoemcc/rrnn
def make_loss_and_grad_and_step(arch, size_input, size_output, size_mem,
                                size_batch, n_layers, n_unroll, k_in, k_h):
    # symbolic variables

    x_tnk = cgt.tensor3()
    targ_tnk = cgt.tensor3()
    #make_network = make_deep_lstm if arch=="lstm" else make_deep_gru
    make_network = make_deep_rrnn_rot_relu
    network = make_network(size_input, size_mem, n_layers, size_output,
                           size_batch, k_in, k_h)
    init_hiddens = [
        cgt.matrix() for _ in xrange(get_num_hiddens(arch, n_layers))
    ]
    # TODO fixed sizes

    cur_hiddens = init_hiddens
    loss = 0
    for t in xrange(n_unroll):
        outputs = network([x_tnk[t]] + cur_hiddens)
        cur_hiddens, prediction_logprobs = outputs[:-1], outputs[-1]
        # loss = loss + nn.categorical_negloglik(prediction_probs, targ_tnk[t]).sum()
        loss = loss - (prediction_logprobs * targ_tnk[t]).sum()
        cur_hiddens = outputs[:-1]

    final_hiddens = cur_hiddens

    loss = loss / (n_unroll * size_batch)

    params = network.get_parameters()
    gradloss = cgt.grad(loss, params)

    flatgrad = flatcat(gradloss)

    with utils.Message("compiling loss+grad"):
        f_loss_and_grad = cgt.function([x_tnk, targ_tnk] + init_hiddens,
                                       [loss, flatgrad] + final_hiddens)
    f_loss = cgt.function([x_tnk, targ_tnk] + init_hiddens, loss)

    assert len(init_hiddens) == len(final_hiddens)

    x_nk = cgt.matrix('x')
    outputs = network([x_nk] + init_hiddens)

    f_step = cgt.function([x_nk] + init_hiddens, outputs)

    # print "node count", cgt.count_nodes(flatgrad)
    return network, f_loss, f_loss_and_grad, f_step
Ejemplo n.º 32
0
def test_cpu_pool(**kwargs):
    np.random.seed(0)
    x = cgt.tensor4("x", fixed_shape=(2, 3, 5, 7))
    y = max_pool_2d(x, (4, 4), (0, 0), (1, 1))
    xval = np.random.randn(2, 3, 5, 7)
    hval = np.random.randn(*cgt.infer_shape(y))
    h = cgt.constant(hval)

    cost = (y * h).sum()

    fcost = cgt.function([x], cost)
    fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

    from cgt.numeric_diff import numeric_grad
    gnum = numeric_grad(fcost, xval)
    gana = fgrad(xval)
    assert np.allclose(gnum, gana)
Ejemplo n.º 33
0
def test_im2col():
    for settings in [((4, 4), (0, 0), (1, 1)), ((3, 3), (1, 1), (2, 2)),
                     ((3, 3), (1, 1), (3, 3))]:
        xval = np.arange(2 * 1 * 28 * 28).reshape(2, 1, 28,
                                                  28).astype(cgt.floatX)
        x = cgt.tensor4("x", fixed_shape=xval.shape)
        y = im2col(x, *settings)
        h = cgt.constant(np.random.randn(*cgt.infer_shape(y)))
        cost = (y * h).sum()

        fcost = cgt.function([x], cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval, eps=1e-5)
        gana = fgrad(xval)
        assert np.allclose(gnum, gana)
Ejemplo n.º 34
0
def test_pool(**kwargs):
    np.random.seed(0)
    x = cgt.tensor4("x", fixed_shape=(2,3,5,7))
    y = max_pool_2d(x, (4,4),(0,0),(1,1))
    xval = np.random.randn(2,3,5,7)
    hval = np.random.randn(*cgt.infer_shape(y))
    h = cgt.constant(hval)

    cost = (y*h).sum()

    fcost = cgt.function([x], cost)
    fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

    from cgt.numeric_diff import numeric_grad
    gnum = numeric_grad(fcost, xval)
    gana = fgrad(xval)
    assert np.allclose(gnum,gana)
Ejemplo n.º 35
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(relu1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = np.load("/Users/joschu/Data/cifar-10-batches-py/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
Ejemplo n.º 36
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
Ejemplo n.º 37
0
def test_update():
    with cgt.scoped_update_config(parallel = True, backend="native"):
        xval = np.array(1.5)
        x = cgt.shared(xval)
        f = cgt.function([], x.sum(), updates=[(x,x+1)])
        before = x.op.get_value().copy()
        f()
        after = x.op.get_value()
        assert np.allclose(after , before+1)
Ejemplo n.º 38
0
 def make_updater_fc():
     X = cgt.matrix("X", fixed_shape=(None, 28 * 28))
     y = cgt.vector("y", dtype='i8')
     stepsize = cgt.scalar("stepsize")
     loss = build_fc_return_loss(X, y)
     params = nn.get_parameters(loss)
     gparams = cgt.grad(loss, params)
     updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
     return cgt.function([X, y, stepsize], loss, updates=updates)
Ejemplo n.º 39
0
def test_update():
    with cgt.scoped_update_config(parallel=True):
        xval = np.array(1.5)
        x = cgt.shared(xval)
        f = cgt.function([], x.sum(), updates=[(x, x + 1)])
        before = x.op.get_value().copy()
        f()
        after = x.op.get_value()
        assert np.allclose(after, before + 1)
 def make_updater_fc():
     X = cgt.matrix("X", fixed_shape=(None, 28 * 28))
     y = cgt.vector("y", dtype="i8")
     stepsize = cgt.scalar("stepsize")
     loss = build_fc_return_loss(X, y)
     params = nn.get_parameters(loss)
     gparams = cgt.grad(loss, params)
     updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
     return cgt.function([X, y, stepsize], loss, updates=updates)
Ejemplo n.º 41
0
def main(num_epochs=NUM_EPOCHS):
    #cgt.set_precision('half')
    print("Building network ...")
    # Recurrent layers expect input of shape
    # (batch size, max sequence length, number of features)
    X = cgt.tensor3(name='X', fixed_shape=(N_BATCH, MAX_LENGTH, 2))
    l_forward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN)
    l_backward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN, backwards=True)
    #l_forward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid)
    #l_backward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid, backwards=True)
    #l_forward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify)
    #l_backward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify, backwards=True)
    l_forward_slice = l_forward[:, MAX_LENGTH-1, :]  # Take the last element in the forward slice time dimension
    l_backward_slice = l_backward[:, 0, :]  # And the first element in the backward slice time dimension
    l_sum = cgt.concatenate([l_forward_slice, l_backward_slice], axis=1)
    l_out = nnbuilder.denseLayer(l_sum, num_units=1, activation=cgt.tanh)
    target_values = cgt.vector('target_output')
    predicted_values = l_out[:, 0]  # For this task we only need the last value
    cost = cgt.mean((predicted_values - target_values)**2)
    # Compute SGD updates for training
    print("Computing updates ...")
    updates = nn.rmsprop(cost, nn.get_parameters(l_out), LEARNING_RATE)
    #updates = nn.nesterov_momentum(cost, nn.get_parameters(l_out), 0.05)
    # cgt functions for training and computing cost
    print("Compiling functions ...")
    train = cgt.function([X, target_values], cost, updates=updates)
    compute_cost = cgt.function([X, target_values], cost)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val = gen_data()

    print("Training ...")
    time_start = time.time()
    try:
        for epoch in range(num_epochs):
            for _ in range(EPOCH_SIZE):
                X, y, m = gen_data()
                train(X, y)
            cost_val = compute_cost(X_val, y_val)
            print("Epoch {} validation cost = {}".format(epoch+1, cost_val))
            print ('Epoch took ' + str(time.time() - time_start))
            time_start = time.time()
    except KeyboardInterrupt:
        pass
Ejemplo n.º 42
0
def main():
    print("Loading data...")
    X = cgt.matrix("X", fixed_shape=(None, 28*28))
    y = cgt.vector("y", dtype='i8')

    model = build_model(X, 0.0)
    loss = -cgt.mean(categorical.loglik(y, model))

    updates = nn.rmsprop(loss, nn.get_parameters(loss), 0.01)
    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)

    y_nodrop = cgt.argmax(model, axis=1)

    cost_nodrop = -cgt.mean(categorical.loglik(y, model))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop])


    batch_size=128
    Xdata, ydata = load_data()

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(3):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])

    nnbuilder.save_weights(model, 'mnist')
Ejemplo n.º 43
0
def test_multi_output():
    for x in (cgt.scalar('x'), cgt.vector('x'), cgt.matrix('x')):
        for cls in (SinCos, SinCos2):
            y, z = core.unpack(core.Result(cls(), [x]))
            xnum = np.ones((3, ) * x.ndim, cgt.floatX)
            correct = (np.sin(xnum), np.cos(xnum))
            yznum = cgt.numeric_eval([y, z], {x: xnum})
            np.testing.assert_allclose(yznum, correct)
            f = cgt.function([x], [y, z])
            np.testing.assert_allclose(f(xnum), correct)
Ejemplo n.º 44
0
 def __init__(self, num_features=None, num_hidden=100):
     stepsize = 0.01
     # with shape (batchsize, ncols)
     X = cgt.matrix("X", fixed_shape=(1, num_features))
     # y: a symbolic variable representing the rewards, which are integers
     y = cgt.scalar("y", dtype='float64')
     
     hid1 = nn.rectify(
         nn.Affine(num_features, num_hidden, weight_init=nn.IIDGaussian(std=.1), bias_init=nn.Constant(1))(X)
     )
     # One final fully-connected layer, and then a linear activation output for reward
     output = nn.Affine(num_hidden, 1, weight_init=nn.IIDGaussian(std=.1), bias_init=nn.Constant(1))(hid1)
     abs_deviation = cgt.abs(output - y).mean()
     params = nn.get_parameters(abs_deviation)
     gparams = cgt.grad(abs_deviation, params)
     
     updates = [(p, p-stepsize*gp) for (p, gp) in zip(params, gparams)]
     self.predictor = cgt.function([X], output)
     self.updater = cgt.function([X, y], abs_deviation, updates=updates)
Ejemplo n.º 45
0
def test_shape_err():
    try:
        with CaptureStderr() as s:
            with cgt.scoped_update_config(debug=True):
                x = cgt.vector()
                y = cgt.vector()
                f = cgt.function([x,y],x+y)
                f(np.zeros(3),np.zeros(4))
    except Exception as e:
        assert "f = cgt.function([x,y],x+y)" in s.getvalue()
Ejemplo n.º 46
0
def CGT_vLJ_Optimize(x):
    N = len(x)
    #cgt.set_precision('double')
    xt = cgt.vector('xt')
    vLJt = 0
    for j in range(1,N):
        for i in range(j):
            rho = ((xt[i*D:i*D+D] - xt[j*D:j*D+D])**2).sum()
            vLJt += rho**(-6.0)-(rho**(-3.0))
    
    f = cgt.function([xt],4*vLJt)
    dvLJc = cgt.grad(4*vLJt, xt)    
    df = cgt.function([xt],dvLJc)
    
    CGT_BFGSres = optimize.minimize(f, np.ravel(x), \
                                  method='L-BFGS-B',        \
                                  jac = df,     \
                                  options={'disp': False})
    return np.reshape(CGT_BFGSres.x, (N,D))
    def make_updater_convnet():
        X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28))  # so shapes can be inferred
        y = cgt.vector("y", dtype="i8")
        stepsize = cgt.scalar("stepsize")
        loss = build_convnet_return_loss(X, y)

        params = nn.get_parameters(loss)
        gparams = cgt.grad(loss, params)
        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], loss, updates=updates)
Ejemplo n.º 48
0
def make_funcs(config, dbg_out=None):
    params, Xs, Ys, C_0, H_0, C_T, H_T, C_1, H_1 = lstm_network(
        config['rnn_steps'], config['num_inputs'], config['num_outputs'],
        config['num_units'], config['num_mems'])

    # basic
    size_batch = Xs[0].shape[0]
    dY = Ys[0].shape[-1]
    Ys_gt = [
        cgt.matrix(fixed_shape=(size_batch, dY), name='Y%d' % t)
        for t in range(len(Ys))
    ]
    Ys_var = [cgt.tensor3(fixed_shape=(size_batch, dY, dY)) for _ in Ys]
    net_inputs, net_outputs = Xs + C_0 + H_0 + Ys_var, Ys + C_T + H_T

    # calculate loss
    loss_vec = []
    for i in range(len(Ys)):
        #     if i == 0: continue
        _l = dist.gaussian.logprob(Ys_gt[i], Ys[i], Ys_var[i])
        loss_vec.append(_l)
    loss_vec = cgt.add_multi(loss_vec)
    if config['weight_decay'] > 0.:
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat**2)
        loss_vec -= loss_param  # / size_batch
    loss = cgt.sum(loss_vec) / config['rnn_steps'] / size_batch
    grad = cgt.grad(loss, params)

    # functions
    def f_init(size_batch):
        c_0, h_0 = [], []
        for _n_m in config['num_mems']:
            if _n_m > 0:
                c_0.append(np.zeros((size_batch, _n_m)))
                h_0.append(np.zeros((size_batch, _n_m)))
        return c_0, h_0

    f_step = cgt.function([Xs[0]] + C_0 + H_0, [Ys[0]] + C_1 + H_1)
    f_loss = cgt.function(net_inputs + Ys_gt, loss)
    f_grad = cgt.function(net_inputs + Ys_gt, grad)
    f_surr = cgt.function(net_inputs + Ys_gt, [loss] + net_outputs + grad)
    return params, f_step, f_loss, f_grad, f_init, f_surr
Ejemplo n.º 49
0
    def runTest(self):
        f1 = cgt.function1([], ())
        assert f1() == ()

        x = cgt.vector()
        xval = np.random.randn(1)
        f2 = cgt.function([x], [(x,x),(x,),()])
        ytrue = [(xval,xval),(xval,),()]
        y = f2(xval)
        assert y==ytrue
Ejemplo n.º 50
0
def test_scalar_input():
    x = cgt.scalar()
    f = cgt.function([x], x**2)
    xval = 2
    yval = 4
    assert np.allclose(f(2), 4)
    assert np.allclose(f(2.0), 4)    
    assert np.allclose(f(np.array(2)), 4)      
    assert np.allclose(f(np.array(2.0)), 4)    
    assert np.allclose(f(np.array([2])[0]), 4)        
    assert np.allclose(f(np.array([2.0])[0]), 4)        
Ejemplo n.º 51
0
    def make_updater_convnet():
        X = cgt.tensor4("X", fixed_shape=(None, 1, 28,
                                          28))  # so shapes can be inferred
        y = cgt.vector("y", dtype='i8')
        stepsize = cgt.scalar("stepsize")
        loss = build_convnet_return_loss(X, y)

        params = nn.get_parameters(loss)
        gparams = cgt.grad(loss, params)
        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], loss, updates=updates)
Ejemplo n.º 52
0
def CGT_dvLJ(x):
    N = len(x)
    xt = cgt.vector('xt')
    vLJt = 0
    for j in range(1,N):
        for i in range(j):
            rho = ((xt[i*D:i*D+D] - xt[j*D:j*D+D])**2).sum()
            vLJt += rho**(-6.0)-(rho**(-3.0))
    
    dvLJc = cgt.grad(4*vLJt, xt)    
    df = cgt.function([xt],dvLJc)
    return df(np.ravel(x))
Ejemplo n.º 53
0
def make_funcs(opt, ntm, total_time, loss_timesteps):
    x_tbk = cgt.tensor3("x", fixed_shape=(total_time, opt.b, opt.k))
    y_tbp = cgt.tensor3("y", fixed_shape=(total_time, opt.b, opt.p))
    loss_timesteps = set(loss_timesteps)

    initial_states = make_ntm_initial_states(opt)
    params = ntm.get_parameters() + get_parameters(initial_states)
    # params = ntm.get_parameters()

    lossCE = 0
    loss01 = 0

    state_arrs = initial_states
    for t in xrange(total_time):
        tmp = ntm([x_tbk[t]] + state_arrs)
        raw_pred = tmp[0]
        state_arrs = tmp[1:4]

        if t in loss_timesteps:
            p_pred = cgt.sigmoid(raw_pred)
            ce = bernoulli_crossentropy(
                y_tbp[t],
                p_pred).sum()  # cross-entropy of bernoulli distribution
            lossCE = lossCE + ce
            loss01 = loss01 + cgt.cast(cgt.equal(y_tbp[t], round01(p_pred)),
                                       cgt.floatX).sum()

    lossCE = lossCE / (len(loss_timesteps) * opt.p * opt.b) / np.log(2)
    loss01 = loss01 / (len(loss_timesteps) * opt.p * opt.b)
    gradloss = cgt.grad(lossCE, params)

    flatgrad = flatcat(gradloss)

    f_loss = cgt.function([x_tbk, y_tbp], lossCE)
    f_loss_and_grad = cgt.function([x_tbk, y_tbp], [lossCE, loss01, flatgrad])

    print "number of nodes in computation graph:", core.count_nodes(
        [lossCE, loss01, flatgrad])

    return f_loss, f_loss_and_grad, params
Ejemplo n.º 54
0
def test_sleeps():
    with cgt.scoped_update_config(parallel=True):
        x = cgt.scalar('x')
        y1 = sleepfor(x, .1)
        y2 = sleepfor(x, .1)

        z = y1 + y2
        fpar = cgt.function([x], z)

        tstart = time.time()
        fpar(0)
        elapsed = time.time() - tstart
        assert elapsed < .11
Ejemplo n.º 55
0
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no", fixed_shape=(None, n_in))
        a_n = cgt.vector("a_n", dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0) / 128.0
        nhid = 64
        h1 = cgt.tanh(
            nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(
            nn.Affine(nhid, n_actions,
                      weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n * q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_n, q_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n],
                                      [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Ejemplo n.º 56
0
def test_cpu_pool():
    with cgt.scoped_update_config(precision="quad", backend="native"):
        print cgt.get_precision()
        ci = get_compile_info()

        np.random.seed(0)
        x = cgt.tensor4("x", fixed_shape=(2, 3, 5, 7))
        y = max_pool_2d(x, (4, 4), (0, 0), (1, 1))
        xval = np.random.randn(2, 3, 5, 7)
        hval = np.random.randn(*cgt.infer_shape(y))
        h = cgt.constant(hval)

        cost = (y * h).sum()

        fcost = cgt.function([x], cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval)
        gana = fgrad(xval)

        assert np.allclose(gnum, gana)