def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no",fixed_shape=(None,n_in))
        a_n = cgt.vector("a_n",dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0)/128.0 
        nhid = 64
        h1 = cgt.tanh(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(nn.Affine(nhid,n_actions,weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n*q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Beispiel #2
0
def test_devices():
    cgt.set_precision("double")
    cgt.update_config(backend="native")
    N = 10
    K = 3

    compile_info = cgt.compilation.get_compile_info()
    cuda_enabled = compile_info["CGT_ENABLE_CUDA"]
    if not cuda_enabled:
        raise SkipTest("cuda disabled")

    Xval = np.random.randn(N, K).astype(cgt.floatX)
    wval = np.random.randn(K).astype(cgt.floatX)
    bval = np.asarray(np.random.randn()).astype(cgt.floatX)
    yval = np.random.randn(N).astype(cgt.floatX)

    with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")):

        X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype="gpu"))
        y_n = cgt.shared(yval, "y")
        w_k = cgt.shared(wval, "w")
        b = cgt.shared(bval, name="b")

        print "bval", bval

        ypred = cgt.dot(cgt.square(X_nk), w_k) + b

        err = cgt.sum(cgt.sin(ypred - y_n))
        g = cgt.grad(err, [w_k, b])
        outputs = [err] + g
        f = cgt.function([], [err] + g)
        results = f()
        print results
        assert np.allclose(results[0], np.sin(np.square(Xval).dot(wval) + bval - yval).sum())
Beispiel #3
0
def test_flatvec():
    cgt.reset_config
    cgt.set_precision('double')
    cgt.core.update_config(backend="python") # XXX

    N = 10
    K = 3

    Xval = np.random.randn(N,K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.shared(Xval, "X")
    y_n = cgt.shared(yval, "y")
    w_k = cgt.shared(wval, "w")
    b = cgt.shared(bval, name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])
    g = core.simplify(g)

    pars = [w_k, b]
    flatx = nn.setup_contiguous_storage(pars)
    f = cgt.function([], [err,cgt.flatcat(g)])
Beispiel #4
0
 def check_conv(precision):
     cgt.reset_config()
     cgt.set_precision(precision)
     f = cgt.function([], nn.conv2d(cgt.constant(x), cgt.constant(filt), kernelshape=(filtrows,filtcols), pad=(filtrows-1, filtcols-1)))
     out1 = f()
     # out1 = cgt.numeric_eval1(nn.conv2d(cgt.constant(x), cgt.constant(f), kersize=(filtrows,filtcols)), {})
     np.testing.assert_allclose(out, out1, atol={"single":1e-3,"double":1e-6}[precision])
Beispiel #5
0
 def runTest(self):
     cgt.set_precision('double')
     x = cgt.vector()
     y = cgt.square(x)
     eg = cgt.execution.compilation_pipeline([x],[y+y],[])
     pprint.pprint(eg.to_json())
     import cycgt
     interp = cycgt.cInterpreter(eg)
     print interp(np.array([3,4,5,6],'f8'))
Beispiel #6
0
def check_scalar_grads(precision, backend):
    cgt.reset_config()
    np.random.seed(0)
    cgt.set_precision(precision)
    cgt.core.update_config(backend=backend)
    x = cgt.scalar('x')
    y = cgt.scalar('y')
    z = cgt.scalar('z')
    vars = [x,y,z] #pylint: disable=W0622
    vals = nr.rand(len(vars))+1

    PROB2RESULT = {}

    for ((key,_), cls) in it.chain(
            it.izip(core.UNARY_INFO.items(),it.repeat(core.ElwiseUnary)),
            it.izip(core.BINARY_INFO.items(),it.repeat(core.ElwiseBinary))
            ):
        if key == "conj":
            print "skipping conj"
            continue
        utils.colorprint(utils.Color.YELLOW, "Testing %s\n"%key)
        if cls == core.ElwiseUnary:
            n_in = 1
            op = cls(key)
        else:
            n_in = 2
            op = cls(key, (True,True))
        inputvars = vars[0:n_in]
        inputvals = vals[0:n_in]
        out = core.Result(op, inputvars)
        f = cgt.function(inputvars, out)
        try:
            grads = cgt.grad(out, inputvars)
        except core.NonDifferentiable:
            print "nondiff"
            continue
        if DISPLAY:
            print "Function:"
            cgt.print_tree(out)
            print "Gradient original:"
            cgt.print_tree(grads)
            print "Gradient simplified:"
        grads_simple = core.simplify(grads)
        if DISPLAY: cgt.print_tree(grads_simple)
        gradf = cgt.function(inputvars, grads)
        eps = {"single":1e-4,"double":1e-9}[precision]
        nugrad = numeric_grad(lambda li: f(*li), inputvals,eps=eps) #pylint: disable=W0640
        cgtgrad = gradf(*inputvals)
        np.testing.assert_almost_equal(nugrad,cgtgrad,decimal={"single":3,"double":6}[precision])

        grad_count = core.count_nodes(grads_simple)
        PROB2RESULT[key] = {}
        PROB2RESULT[key]["grad"] = grad_count

    if DISPLAY:
        from thirdparty.tabulate import tabulate
        print tabulate([[key,val["grad"]] for (key,val) in PROB2RESULT.iteritems()],headers=["funcname","gradcount"])    
Beispiel #7
0
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim))
        a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim]

        logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n*adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean()


        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])
        self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])

        self.pc = ParamCollection(params)
Beispiel #8
0
def test_multi_output():
    cgt.reset_config()
    cgt.set_precision("single")    
    for x in (cgt.scalar('x'), cgt.vector('x'), cgt.matrix('x')):
        for cls in (SinCos, SinCos2):
            y,z = core.unpack(core.Result(cls(), [x]))
            xnum = np.ones((3,)*x.ndim, cgt.floatX)
            correct = (np.sin(xnum),np.cos(xnum))
            yznum = cgt.numeric_eval([y,z], {x:xnum})
            np.testing.assert_allclose(yznum, correct)
            f = cgt.function([x],[y,z])
            np.testing.assert_allclose(f(xnum), correct)
Beispiel #9
0
def test_linreg():
    cgt.reset_config()
    cgt.set_precision('double')
    N = 10
    K = 3

    Xval = np.random.randn(N, K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.matrix("X")
    y_n = cgt.vector("y")
    w_k = cgt.vector("w")
    b = cgt.scalar(name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])

    g_simple, an, _ = cgt.core.simplify_and_analyze(g)

    print "Loss function:"
    cgt.print_tree([err])
    print "Gradient:"
    cgt.print_tree(g)

    print "Gradient simplified"
    cgt.print_tree(
        g_simple,
        nodefn=lambda node, o: o.write(" " + an["node2hash"][node][:5]))

    print "-------"

    d = {X_nk: Xval, w_k: wval, b: bval, y_n: yval}

    np.testing.assert_allclose(cgt.numeric_eval(err, d),
                               np.linalg.norm(Xval.dot(wval) + bval - yval)**2)
    np.testing.assert_allclose(cgt.numeric_eval(g[0], d),
                               2 * Xval.T.dot(Xval.dot(wval) + bval - yval))
    np.testing.assert_allclose(cgt.numeric_eval(g[1], d),
                               2 * np.sum(Xval.dot(wval) + bval - yval, 0))
Beispiel #10
0
def test_einsum():
    cgt.reset_config()
    cgt.set_precision("double")
    x = cgt.tensor3()
    y = cgt.tensor3()

    sizes = {'i':2,'j':3,'k':5,'l':7}
    xaxes = 'ijk'
    yaxes = 'ikl'
    zaxes = 'ijl'
    for i in xrange(10):
        xperm = xaxes
        (yperm,zperm) = permaxes = [[chars[i] for i in np.random.permutation(3)] for chars in [yaxes,zaxes]]
        desc = "%s,%s->%s"%tuple("".join(chars) for chars in [xperm] + permaxes)
        z = cgt.einsum(desc, x, y)
        xval = nr.randn(*(sizes[c] for c in xperm))
        yval = nr.randn(*(sizes[c] for c in yperm))
        np.testing.assert_allclose(
            cgt.numeric_eval(z, {x : xval, y : yval}),
            np.einsum(desc, xval, yval))
Beispiel #11
0
def test_lrn():
    if not get_compile_info()["CGT_ENABLE_CUDA"]:
        raise SkipTest("Skipping because CUDA disabled")

    with cgt.scoped_update_config(precision="double",backend="native"):
        from cgt.tests import gradcheck_model
        cgt.set_precision('double')
        nr.seed(0)
        Xval = nr.randn(4,8,16,16)
        X = cgt.shared(Xval, name="X", fixed_shape_mask="all")
        # X = cgt.tensor4(name='X')
        y = cross_channel_lrn(X, localsize=4, alpha=.1, beta=.5)
        f = cgt.function([],y)
        print f().sum()
        print f().sum()
        print f().sum()
        assert np.isfinite(f().sum())
        # print f(Xval).sum()
        a = nr.rand(*cgt.infer_shape(y))
        loss = (y*a).sum()
        gradcheck_model(loss, [X],eps=1e-5)
Beispiel #12
0
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no", fixed_shape=(None, n_in))
        a_n = cgt.vector("a_n", dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0) / 128.0
        nhid = 64
        h1 = cgt.tanh(
            nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(
            nn.Affine(nhid, n_actions,
                      weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n * q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_n, q_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n],
                                      [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Beispiel #13
0
def test_linreg():
    cgt.reset_config()
    cgt.set_precision('double')
    N = 10
    K = 3

    Xval = np.random.randn(N,K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.matrix("X")
    y_n = cgt.vector("y")
    w_k = cgt.vector("w")
    b = cgt.scalar(name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])

    g_simple,an,_ = cgt.core.simplify_and_analyze(g)


    print "Loss function:"
    cgt.print_tree([err])
    print "Gradient:"
    cgt.print_tree(g)

    print "Gradient simplified"
    cgt.print_tree(g_simple, nodefn=lambda node,o: o.write(" " + an["node2hash"][node][:5]))

    print "-------"

    d = {X_nk : Xval, w_k : wval, b : bval, y_n : yval}

    np.testing.assert_allclose(cgt.numeric_eval(err,d), np.linalg.norm(Xval.dot(wval) + bval - yval)**2)
    np.testing.assert_allclose(cgt.numeric_eval(g[0],d), 2 * Xval.T.dot(Xval.dot(wval) + bval - yval))
    np.testing.assert_allclose(cgt.numeric_eval(g[1],d), 2 *  np.sum(Xval.dot(wval) + bval - yval, 0))
Beispiel #14
0
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim))
        a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)),
                                               name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(
            nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(
            nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,
                            ctrl_dim,
                            weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim]

        logp_n = ((-.5) * cgt.square(
            (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square(
            (a_na - oldmean_na) / oldstd_na).sum(axis=1)
                     ) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n * adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) +
              (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) -
              .5).sum(axis=1).mean()

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n],
                                             [surr, kl])
        self._compute_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_na, adv_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n],
                                   [surr, kl])

        self.pc = ParamCollection(params)
Beispiel #15
0
    def __init__(self,
                 model="dense",
                 im_size=[28, 28],
                 dropout=True,
                 devtype="cpu",
                 grad_check=True,
                 reg=0):
        if grad_check: cgt.set_precision("quad")
        self.model = model
        self.reg = reg
        np.random.seed(0)
        cgt.update_config(default_device=cgt.core.Device(devtype=devtype),
                          backend="native")
        print(model)
        # MLP with 1 hidden layer
        if model == "dense1":
            self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1]
            self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
            self.y = cgt.vector("y", dtype='i8')
            self.p_drop_input, self.p_drop_hidden = (0.2,
                                                     0.5) if dropout else (0,
                                                                           0)
            self.w_h = init_weights(self.Xsize, 256)
            self.w_o = init_weights(256, 8)
            self.pofy_drop = dense_model1(self.X, self.w_h, self.w_o,
                                          self.p_drop_input,
                                          self.p_drop_hidden)
            self.pofy_nodrop = dense_model1(self.X, self.w_h, self.w_o, 0., 0.)
            self.params = [self.w_h, self.w_o]
            self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_o).sum()
            self.cost_drop = -cgt.mean(
                categorical.loglik(self.y,
                                   self.pofy_drop)) + self.reg * self.l1
        # MLP with 2 hidden layers
        elif model == "dense2":
            self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1]
            self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
            self.y = cgt.vector("y", dtype='i8')
            self.p_drop_input, self.p_drop_hidden = (0.2,
                                                     0.5) if dropout else (0,
                                                                           0)
            self.w_h = init_weights(self.Xsize, 256)
            self.w_h2 = init_weights(256, 256)
            self.w_o = init_weights(256, 8)
            self.pofy_drop = dense_model2(self.X, self.w_h, self.w_h2,
                                          self.w_o, self.p_drop_input,
                                          self.p_drop_hidden)
            self.pofy_nodrop = dense_model2(self.X, self.w_h, self.w_h2,
                                            self.w_o, 0., 0.)
            self.params = [self.w_h, self.w_h2, self.w_o]
            self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(
                self.w_h2).sum() + cgt.abs(self.w_o).sum()
            self.cost_drop = -cgt.mean(
                categorical.loglik(self.y,
                                   self.pofy_drop)) + self.reg * self.l1
        # MLP with 3 hidden layers
        elif model == "dense3":
            self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1]
            self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
            self.y = cgt.vector("y", dtype='i8')
            self.p_drop_input, self.p_drop_hidden = (
                0.0, [0.5, 0.5, 0.5]) if dropout else (0, [0, 0, 0])
            self.w_h = init_weights(self.Xsize, 256)
            self.w_h2 = init_weights(256, 256)
            self.w_h3 = init_weights(256, 256)
            self.w_o = init_weights(256, 8)
            self.pofy_drop = dense_model3(self.X, self.w_h, self.w_h2,
                                          self.w_h3, self.w_o,
                                          self.p_drop_input,
                                          self.p_drop_hidden)
            self.pofy_nodrop = dense_model3(self.X, self.w_h, self.w_h2,
                                            self.w_h3, self.w_o, 0.,
                                            [0., 0., 0.])
            self.params = [self.w_h, self.w_h2, self.w_h3, self.w_o]
            self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_h2).sum() + cgt.abs(self.w_h3).sum() + \
                      cgt.abs(self.w_o).sum()
            self.cost_drop = -cgt.mean(
                categorical.loglik(self.y,
                                   self.pofy_drop)) + self.reg * self.l1
        else:
            raise RuntimeError("Unknown Model")

        self.y_nodrop = cgt.argmax(self.pofy_nodrop, axis=1)
        self.cost_nodrop = -cgt.mean(
            categorical.loglik(self.y, self.pofy_nodrop))
        self.err_nodrop = cgt.cast(cgt.not_equal(self.y_nodrop, self.y),
                                   cgt.floatX).mean()
        self.computeloss = cgt.function(
            inputs=[self.X, self.y],
            outputs=[self.err_nodrop, self.cost_nodrop])
        self.y_out = cgt.function(inputs=[self.X], outputs=[self.y_nodrop])
        self.updates = rmsprop_updates(self.cost_drop, self.params)
        self.train = cgt.function(inputs=[self.X, self.y],
                                  outputs=[],
                                  updates=self.updates)
Beispiel #16
0
    def run_training(self,
                     input,
                     stepsize=0.01,
                     epochs=10,
                     output='None',
                     batch_size=128,
                     grad_check=True,
                     profile=False,
                     step_decrease_rate=0.5,
                     step_decrease_time=1000):
        # run NN training from input matlab data file, and save test data prediction in output file

        # load data from Matlab file, including
        # im_data: flattened images
        # state_data: concatenated one-hot vectors for each state variable
        # label_data: one-hot vector for action (state difference)
        if grad_check: cgt.set_precision("quad")
        matlab_data = sio.loadmat(input)
        im_data = matlab_data["im_data"]
        im_data = (im_data - 1) / 255  # obstacles = 1, free zone = 0
        state_data = matlab_data["state_data"]
        value_data = matlab_data["value_data"]
        label_data = matlab_data["label_data"]
        Xdata = (np.concatenate((np.concatenate(
            (im_data, value_data), axis=1), state_data),
                                axis=1)).astype(cgt.floatX)
        ydata = label_data

        training_samples = int(6 / 7.0 * Xdata.shape[0])
        Xtrain = Xdata[0:training_samples]
        ytrain = ydata[0:training_samples]

        Xtest = Xdata[training_samples:]
        ytest = ydata[training_samples:]

        sortinds = np.random.permutation(training_samples)
        Xtrain = Xtrain[sortinds]
        ytrain = ytrain[sortinds]

        self.updates = rmsprop_updates(self.cost_drop,
                                       self.params,
                                       stepsize=stepsize)
        self.train = cgt.function(inputs=[self.X, self.y],
                                  outputs=[],
                                  updates=self.updates)

        from cgt.tests import gradcheck_model
        if grad_check:
            cost_nodrop = cgt.core.clone(self.cost_nodrop, {
                self.X: Xtrain[:1],
                self.y: ytrain[:1]
            })
            print "doing gradient check..."
            print "------------------------------------"
            gradcheck_model(cost_nodrop, self.params[0:1])
            print "success!"
            return

        if profile: cgt.profiler.start()

        print fmt_row(10, [
            "Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err",
            "Epoch Time"
        ])
        for i_epoch in xrange(int(epochs)):
            tstart = time.time()
            for start in xrange(0, Xtrain.shape[0], batch_size):
                end = start + batch_size
                self.train(Xtrain[start:end], ytrain[start:end])
            elapsed = time.time() - tstart
            trainerr, trainloss = self.computeloss(Xtrain[:len(Xtest)],
                                                   ytrain[:len(Xtest)])
            testerr, testloss = self.computeloss(Xtest, ytest)
            print fmt_row(
                10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
            if (i_epoch > 0) & (i_epoch % step_decrease_time == 0):
                stepsize = step_decrease_rate * stepsize
                self.updates = rmsprop_updates(self.cost_drop,
                                               self.params,
                                               stepsize=stepsize)
                self.train = cgt.function(inputs=[self.X, self.y],
                                          outputs=[],
                                          updates=self.updates)
                print stepsize
        if profile: cgt.execution.profiler.print_stats()

        # save Matlab data
        if output != 'None':
            sio.savemat(file_name=output,
                        mdict={
                            'in': Xtest,
                            'out': self.y_out(Xtest)
                        })
Beispiel #17
0
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int,default=64)
    parser.add_argument("--size_batch", type=int,default=64)
    parser.add_argument("--n_layers",type=int,default=2)
    parser.add_argument("--n_unroll",type=int,default=16)
    parser.add_argument("--step_size",type=float,default=.01)
    parser.add_argument("--decay_rate",type=float,default=0.95)
    parser.add_argument("--n_epochs",type=int,default=20)
    parser.add_argument("--arch",choices=["lstm","gru"],default="lstm")
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--temperature",type=float,default=1)

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir,args.size_batch, args.n_unroll, (1.0,0,0))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(args.arch, loader.size_vocab, 
        loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),)))

    def initialize_hiddens(n):
        return [np.zeros((n, args.size_mem), cgt.floatX) for _ in xrange(get_num_hiddens(args.arch, args.n_layers))]

    if args.grad_check:
        x,y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)
        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x,y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss
        from cgt.numeric_diff import numeric_grad
        g_num = numeric_grad(f, pc.get_value_flat(),eps=1e-10)
        result = f_loss_and_grad(x,y,*prev_hiddens)
        g_anal = result[1]
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size = args.step_size, 
        decay_rate = args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch",iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x,y) in loader.train_batches_iter():
            out = f_loss_and_grad(x,y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f"%((time()-tstart)/len(losses), np.mean(losses))
        optim_state.step_size *= .98 #pylint: disable=E1101

        sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=1000, temperature=args.temperature, seed_text = "")

    if args.profile: profiler.print_stats()
Beispiel #18
0
import cgt
from cgt import nn
from cgt.core import infer_shape
import numpy as np
infile = "/Users/joschu/Src/caffe/examples/mnist/lenet.prototxt"
# infile = "/Users/joschu/Src/caffe/models/bvlc_googlenet/train_val.prototxt"

with open(osp.expanduser(infile),"r") as fh:
    text = fh.read()
net = NetParameter()
text_format.Merge(text, net)


name2node = {}

cgt.set_precision('single')

if net.input: #pylint: disable=E1101
    assert len(net.input) == 1 #pylint: disable=E1101
    name2node[net.input[0]] = cgt.tensor(ndim=4,dtype=cgt.floatX, fixed_shape=tuple(net.input_dim))


# XXX super inefficient

for layer in net.layer: #pylint: disable=E1101
    if layer.phase==TRAIN:
        print "loading layer %s type=%s in=%s out=%s"%(layer.name, layer.type, layer.bottom, layer.top)
        output = None
        inputs = [name2node[name] for name in layer.bottom]
        if layer.type == "Data":
            tp = layer.transform_param
Beispiel #19
0
def main():
    import argparse
    parser=argparse.ArgumentParser()
    parser.add_argument("--epochs",type=int,default=10)
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--dropout",action="store_true")
    parser.add_argument("--stepsize",type=float, default=.001)
    parser.add_argument("--model",choices=["dense","conv"],default="dense")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu")
    args = parser.parse_args()

    if args.grad_check: cgt.set_precision("quad")

    # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/
    # converted to npz
    mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz")

    Xdata = (mnist["X"]/255.).astype(cgt.floatX)
    ydata = mnist["y"]

    np.random.seed(0)

    cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native")

    if args.model=="conv":
        Xdata = Xdata.reshape(-1, 1, 28, 28)

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    X = cgt.tensor4("X",fixed_shape=(None,1,28,28)) if args.model=="conv" else cgt.matrix("X", fixed_shape=(None,28*28))
    y = cgt.vector("y",dtype='i8')

    if args.model == "dense":
        p_drop_input,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0)    
        w_h = init_weights(784, 256)
        w_h2 = init_weights(256, 256)
        w_o = init_weights(256, 10)
        pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden)
        pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.)
        params = [w_h, w_h2, w_o]        
    elif args.model == "conv":
        p_drop_conv,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0)            
        w = init_weights(32, 1, 3, 3)
        w2 = init_weights(64, 32, 3, 3)
        w3 = init_weights(128, 64, 3, 3)
        w4 = init_weights(128 * 2 * 2, 625)
        w_o = init_weights(625, 10)
        pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden)
        pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.)
        params = [w, w2, w3, w4, w_o]
    else:
        raise RuntimeError("Unreachable")

    cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop))
    updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize)

    y_nodrop = cgt.argmax(pofy_nodrop, axis=1)
    cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop,cost_nodrop])

    batch_size=128


    from cgt.tests import gradcheck_model
    if args.grad_check:
        cost_nodrop = cgt.core.clone(cost_nodrop, {X:Xtrain[:1],y:ytrain[:1]})
        print "doing gradient check..."
        print "------------------------------------"
        gradcheck_model(cost_nodrop, params[0:1])
        print "success!"
        return

    if args.profile: cgt.profiler.start()

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
            if args.unittest: return
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
    if args.profile: cgt.execution.profiler.print_stats()
Beispiel #20
0
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int, default=64)
    parser.add_argument("--size_batch", type=int, default=64)
    parser.add_argument("--n_layers", type=int, default=2)
    parser.add_argument("--n_unroll", type=int, default=16)
    parser.add_argument("--k_in", type=int, default=3)
    parser.add_argument("--k_h", type=int, default=5)
    parser.add_argument("--step_size", type=float, default=.01)
    parser.add_argument("--decay_rate", type=float, default=0.95)
    parser.add_argument("--n_epochs", type=int, default=20)
    parser.add_argument("--arch", choices=["lstm", "gru"], default="gru")
    parser.add_argument("--grad_check", action="store_true")
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--unittest", action="store_true")

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir, args.size_batch, args.n_unroll,
                    (.8, .1, .1))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(
        args.arch, loader.size_vocab, loader.size_vocab, args.size_mem,
        args.size_batch, args.n_layers, args.n_unroll, args.k_in, args.k_h)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-0.01, 0.01, size=(pc.get_total_size(), )))

    for i, param in enumerate(pc.params):
        if "is_rotation" in param.props:
            shape = pc.get_shapes()[i]
            num_vec = int(shape[0] / 2)
            size_vec = int(shape[1])
            gauss = nr.normal(size=(num_vec * size_vec))
            gauss = np.reshape(gauss, (num_vec, size_vec))
            gauss_mag = norm(gauss, axis=1, keepdims=True)
            gauss_normed = gauss / gauss_mag
            gauss_perturb = nr.normal(scale=0.01, size=(num_vec * size_vec))
            gauss_perturb = np.reshape(gauss_perturb, (num_vec, size_vec))
            second_vec = gauss_normed + gauss_perturb
            second_vec_mag = norm(second_vec, axis=1, keepdims=True)
            second_vec_normed = second_vec / second_vec_mag
            new_param_value = np.zeros(shape)
            for j in xrange(num_vec):
                new_param_value[2 * j, :] = gauss_normed[j, :]
                new_param_value[2 * j + 1, :] = second_vec_normed[j, :]
            param.op.set_value(new_param_value)
            #print new_param_value

    def initialize_hiddens(n):
        return [
            np.ones((n, args.size_mem), cgt.floatX) / float(args.size_mem)
            for _ in xrange(get_num_hiddens(args.arch, args.n_layers))
        ]

    if args.grad_check:
        #if True:
        x, y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)

        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x, y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss

        from cgt.numeric_diff import numeric_grad
        print "Beginning grad check"
        g_num = numeric_grad(f, pc.get_value_flat(), eps=1e-10)
        print "Ending grad check"
        result = f_loss_and_grad(x, y, *prev_hiddens)
        g_anal = result[1]
        diff = g_num - g_anal
        abs_diff = np.abs(diff)
        print np.where(abs_diff > 1e-4)
        print diff[np.where(abs_diff > 1e-4)]
        embed()
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch", iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x, y) in loader.train_batches_iter():
            out = f_loss_and_grad(x, y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f" % (
            (time() - tstart) / len(losses), np.mean(losses))
        optim_state.step_size *= .98  #pylint: disable=E1101

        sample(f_step,
               initialize_hiddens(1),
               char2ind=loader.char2ind,
               n_steps=300,
               temp=1.0,
               seed_text="")

    if args.profile: profiler.print_stats()
Beispiel #21
0
def main():

    nr.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default="alice")
    parser.add_argument("--size_mem", type=int,default=64)
    parser.add_argument("--size_batch", type=int,default=64)
    parser.add_argument("--n_layers",type=int,default=2)
    parser.add_argument("--n_unroll",type=int,default=16)
    parser.add_argument("--k_in",type=int,default=3)
    parser.add_argument("--k_h",type=int,default=5)
    parser.add_argument("--step_size",type=float,default=.01)
    parser.add_argument("--decay_rate",type=float,default=0.95)
    parser.add_argument("--n_epochs",type=int,default=20)
    parser.add_argument("--arch",choices=["lstm","gru"],default="gru")
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")

    args = parser.parse_args()

    cgt.set_precision("quad" if args.grad_check else "single")

    assert args.n_unroll > 1

    loader = Loader(args.data_dir,args.size_batch, args.n_unroll, (.8,.1,.1))

    network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(args.arch, loader.size_vocab, 
        loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll, args.k_in, args.k_h)

    if args.profile: profiler.start()

    params = network.get_parameters()
    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-0.01, 0.01, size=(pc.get_total_size(),)))

    for i, param in enumerate(pc.params):
        if "is_rotation" in param.props:
            shape = pc.get_shapes()[i]
            num_vec = int(shape[0] / 2)
            size_vec = int(shape[1])
            gauss = nr.normal(size=(num_vec * size_vec))
            gauss = np.reshape(gauss, (num_vec, size_vec))
            gauss_mag = norm(gauss, axis=1, keepdims=True)
            gauss_normed = gauss / gauss_mag
            gauss_perturb = nr.normal(scale=0.01, size=(num_vec * size_vec))
            gauss_perturb = np.reshape(gauss_perturb, (num_vec, size_vec))
            second_vec = gauss_normed + gauss_perturb
            second_vec_mag = norm(second_vec, axis=1, keepdims=True)
            second_vec_normed = second_vec / second_vec_mag
            new_param_value = np.zeros(shape)
            for j in xrange(num_vec):
                new_param_value[2 * j, :] = gauss_normed[j, :]
                new_param_value[2 * j + 1, :] = second_vec_normed[j, :]
            param.op.set_value(new_param_value)
            #print new_param_value



    def initialize_hiddens(n):
        return [np.ones((n, args.size_mem), cgt.floatX) / float(args.size_mem) for _ in xrange(get_num_hiddens(args.arch, args.n_layers))]

    if args.grad_check:
    #if True:
        x,y = loader.train_batches_iter().next()
        prev_hiddens = initialize_hiddens(args.size_batch)
        def f(thnew):
            thold = pc.get_value_flat()
            pc.set_value_flat(thnew)
            loss = f_loss(x,y, *prev_hiddens)
            pc.set_value_flat(thold)
            return loss
        from cgt.numeric_diff import numeric_grad
        print "Beginning grad check"
        g_num = numeric_grad(f, pc.get_value_flat(),eps=1e-10)
        print "Ending grad check"
        result = f_loss_and_grad(x,y,*prev_hiddens)
        g_anal = result[1]
        diff = g_num - g_anal
        abs_diff = np.abs(diff)
        print np.where(abs_diff > 1e-4)
        print diff[np.where(abs_diff > 1e-4)]
        embed()
        assert np.allclose(g_num, g_anal, atol=1e-4)
        print "Gradient check succeeded!"
        return

    optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size = args.step_size, 
        decay_rate = args.decay_rate)

    for iepoch in xrange(args.n_epochs):
        losses = []
        tstart = time()
        print "starting epoch",iepoch
        cur_hiddens = initialize_hiddens(args.size_batch)
        for (x,y) in loader.train_batches_iter():
            out = f_loss_and_grad(x,y, *cur_hiddens)
            loss = out[0]
            grad = out[1]
            cur_hiddens = out[2:]
            rmsprop_update(grad, optim_state)
            pc.set_value_flat(optim_state.theta)
            losses.append(loss)
            if args.unittest: return
        print "%.3f s/batch. avg loss = %.3f"%((time()-tstart)/len(losses), np.mean(losses))
        optim_state.step_size *= .98 #pylint: disable=E1101

        sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=300, temp=1.0, seed_text = "")

    if args.profile: profiler.print_stats()
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--n_batches",type=int,default=1000000)
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest", action="store_true")
    parser.add_argument("--task",choices=["copy","reverse_copy","repeat_copy"],default="copy")
    args = parser.parse_args()
    np.seterr("raise")

    cgt.set_precision("quad" if args.grad_check else "double")
    np.random.seed(0)

    # model parameters
    if args.grad_check:
        opt = NTMOpts(
            b = 1, # batch size
            h = 1, # number of heads
            n = 2, # number of memory sites
            m = 3, # dimension at each memory site
            k = 4, # dimension of input
            p = 2, # dimension of output
            ff_hid_sizes = []
        )
        seq_length = 2

    else:
        opt = NTMOpts(
            b = 64, # batch size
            h = 3, # number of heads
            n = 128, # number of memory sites
            m = 20, # dimension at each memory site
            k = 3, # dimension of input
            p = 1, # dimension of output
            ff_hid_sizes = [128,128]
        )

        seq_length = 10


    if args.unittest:
        seq_length=3
        args.n_batches=3
        


    tstart = time.time()
    ntm = make_ntm(opt)
    if args.task == "copy":
        task = CopyTask(opt.b, seq_length, opt.p)
    elif args.task == "reverse_copy":
        task = ReverseCopyTask(opt.b, seq_length, opt.p)
    elif args.task == "repeat_copy":
        n_copies = 4
        task = RepeatCopyTask(opt.b, seq_length, opt.p, n_copies)


    f_loss, f_loss_and_grad, params = make_funcs(opt, ntm, task.total_time(), task.loss_timesteps())
    print "graph construction and compilation took %g seconds"%(time.time()-tstart)

    pc = ParamCollection(params)
    pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),)))

    if args.grad_check:
        x,y = task.gen_batch()
        def f(thnew):
            thold = th.copy()
            pc.set_value_flat(thnew)
            loss = f_loss(x,y)
            pc.set_value_flat(thold)
            return loss
        from cgt.numeric_diff import numeric_grad
        g_num = numeric_grad(f, th,eps=1e-8)
        _, _, g_anal = f_loss_and_grad(x,y)
        assert np.allclose(g_num, g_anal, atol=1e-8)
        print "Gradient check succeeded!"
        print "%i/%i elts of grad are nonzero"%( (g_anal != 0).sum(), g_anal.size )
        return


    seq_num = 0
    state = make_rmsprop_state(pc.get_value_flat(), .01, .95)
    print fmt_row(13, ["seq num", "CE (bits)", "accuracy", "|g|_inf"], header=True)
    
    if args.profile: cgt.profiler.start()
    
    for i in xrange(args.n_batches):
        x,y = task.gen_batch()
        seq_num += x.shape[1]
        l,l01,g = f_loss_and_grad(x,y)
        print fmt_row(13, [seq_num, l,l01,np.abs(g).max()])
        rmsprop_update(g, state)        
        pc.set_value_flat(state.theta)
        if not np.isfinite(l): break

    
    if args.profile: cgt.profiler.print_stats()
Beispiel #23
0
def check_affine_funcs(precision, backend):
    cgt.reset_config()
    np.random.seed(0)
    cgt.set_precision(precision)
    cgt.core.update_config(backend=backend)

    sA = np.array(nr.rand())
    sB = np.array(nr.rand())
    sC = np.array(nr.rand())
    mA = nr.randn(2,3)
    mB = nr.randn(2,3)
    mC = nr.randn(2,3)

    for fn in [xplusx, _2x_plus_3x, xm1, onemx]:
        for arg in [sA, mA]:
            check_affine(fn, arg)

    check_affine(elem_mult2, mA, mB, mC)
    check_affine(elem_mult2, sA, sB, sC)
    check_affine(pyramid, sA, sB, sC)
    check_affine(pyramid, mA, mB, mC)
    check_affine(slisum1, mA)
    check_affine(slisum2, mA)
    check_affine(slisum3, mA)
    check_affine(slisum4, mA)
    check_affine(max0, mA)
    check_affine(max1, mA)
    check_affine(max2, mA)
    check_affine(fancysli0, mA)
    check_affine(sum10, mA)
    check_affine(sum01, mA)
    check_affine(repeat0, mA[0:1, :], nr.randn(7,3))
    check_affine(repeat1, mA[:, 0:1], nr.randn(2,7))

    M23 = mA
    M35 = nr.randn(3,5)
    v3 = nr.randn(3)
    v13 = v3.reshape(1,3) #XXX
    v5 = nr.randn(5)
    v15 = v5.reshape(1,5) #XXX
    v3b = nr.randn(3)

    check_affine(matmat00, M23, M35)
    check_affine(matmat01, M23, M35.T)
    check_affine(matmat10, M23.T, M35)
    check_affine(matmat11, M23.T, M35.T)

    check_affine(matmat00a, M23, M35)
    check_affine(matmat01a, M23, M35.T)
    # check_affine(matmat10a, M23.T, M35)
    check_affine(matmat11a, M23.T, M35.T)

    check_affine(matvec, M23, v3)
    check_affine(vecvec, v3, v3b)
    check_affine(bcadd, M23, v13)
    check_affine(matmatplusvec, M23, M35, v15)
    check_affine(transpose, M23, nr.randn(3,2))


    T235 = nr.randn(2,3,5)
    T235a = nr.randn(2,3,5)
    T257 = nr.randn(2,5,7)
    T2357 = nr.randn(2,3,5,7)
    T2357a = nr.randn(2,3,5,7)

    check_affine(transpose012, T235, T235a)
    check_affine(transpose021, T235, T235a.transpose(0,2,1))
    check_affine(transpose102, T235, T235a.transpose(1,0,2))
    check_affine(transpose0312, T2357, T2357a.transpose(0,3,1,2))
    check_affine(transpose0231, T2357, T2357a.transpose(0,2,3,1))

    check_affine(batchedmatmul, T235, T257)

    check_affine(flip0, M23, nr.randn(2,3))
    check_affine(flip1, M23, nr.randn(2,3))

    # check_affine(negsli0, M23, nr.randn(2,3))
    # check_affine(negsli1, M23, nr.randn(2,3))
    # check_affine(negsli01, M23, nr.randn(2,3))

    # check_affine(rfft, M35)
    check_affine(convlike, T2357, nr.randn(11,3*5*7), nr.randn(2,11))


    if DISPLAY:
        from thirdparty.tabulate import tabulate
        print tabulate([[key,val["fn"],val["grad"]] for (key,val) in sorted(PROB2RESULT.items())],headers=["funcname","fncount","gradcount"])