Example #1
0
def diag(v, k=0):
    """
    see numpy.diag
    """
    assert isinstance(k, int)
    assert v.ndim == 1
    n = size(v,0)+abs(k)
    out = cgt.zeros((n,n), v.dtype)
    out = inc_subtensor(out, (cgt.arange(n), cgt.arange(n)+k), v)
    return out
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no",fixed_shape=(None,n_in))
        a_n = cgt.vector("a_n",dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0)/128.0 
        nhid = 64
        h1 = cgt.tanh(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(nn.Affine(nhid,n_actions,weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n*q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Example #3
0
File: api.py Project: Quantza/cgt
def to_one_hot(y, nb_class, dtype=None):
    """
    Return a matrix where each row corresponds to the one hot
    encoding of each element in y.
    Parameters
    ----------
    y
        A vector of integer value between 0 and nb_class - 1.
    nb_class : int
        The number of classes in y.
    dtype : data-type
        The dtype of the returned matrix. Default floatX.
    Returns
    -------
    object
        A matrix of shape (y.shape[0], nb_class), where each row ``i`` is
        the one hot encoding of the corresponding ``y[i]`` value.
    """
    
    fill_vals = cgt.ones((y.shape[0],))
    ret = cgt.zeros((y.shape[0], nb_class), dtype)
    
    d1 = cgt.arange(y.shape[0])
    d2 = cgt.cast(y, 'i1')
    
    ret = cgt.inc_subtensor(ret, [d1, d2], fill_vals)
    
    return ret
Example #4
0
File: api.py Project: xyuan/cgt
def to_one_hot(y, nb_class, dtype=None):
    """
    Return a matrix where each row corresponds to the one hot
    encoding of each element in y.
    Parameters
    ----------
    y
        A vector of integer value between 0 and nb_class - 1.
    nb_class : int
        The number of classes in y.
    dtype : data-type
        The dtype of the returned matrix. Default floatX.
    Returns
    -------
    object
        A matrix of shape (y.shape[0], nb_class), where each row ``i`` is
        the one hot encoding of the corresponding ``y[i]`` value.
    """

    fill_vals = cgt.ones((y.shape[0], ))
    ret = cgt.zeros((y.shape[0], nb_class), dtype)

    d1 = cgt.arange(y.shape[0])
    d2 = cgt.cast(y, 'i1')

    ret = cgt.inc_subtensor(ret, [d1, d2], fill_vals)

    return ret
Example #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
Example #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(relu1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = np.load("/Users/joschu/Data/cifar-10-batches-py/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
 def build_fc_return_loss(X, y):
     """
     Build fully connected network and return loss
     """
     np.random.seed(0)
     h1 = nn.rectify(nn.Affine(28 * 28, 256, weight_init=nn.IIDGaussian(std=0.1))(X))
     h2 = nn.rectify(nn.Affine(256, 256, weight_init=nn.IIDGaussian(std=0.1))(h1))
     logprobs = nn.logsoftmax(nn.Affine(256, 10, weight_init=nn.IIDGaussian(std=0.1))(h2))
     neglogliks = -logprobs[cgt.arange(X.shape[0]), y]
     loss = neglogliks.mean()
     return loss
Example #8
0
 def build_fc_return_loss(X, y):
     """
     Build fully connected network and return loss
     """
     np.random.seed(0)
     h1 = nn.rectify(
         nn.Affine(28 * 28, 256, weight_init=nn.IIDGaussian(std=.1))(X))
     h2 = nn.rectify(
         nn.Affine(256, 256, weight_init=nn.IIDGaussian(std=.1))(h1))
     logprobs = nn.logsoftmax(
         nn.Affine(256, 10, weight_init=nn.IIDGaussian(std=.1))(h2))
     neglogliks = -logprobs[cgt.arange(X.shape[0]), y]
     loss = neglogliks.mean()
     return loss
 def build_convnet_return_loss(X, y):
     np.random.seed(0)
     conv1 = nn.rectify(
         nn.SpatialConvolution(1, 32, kernelshape=(3, 3), pad=(0, 0), weight_init=nn.IIDGaussian(std=0.1))(X)
     )
     pool1 = nn.max_pool_2d(conv1, kernelshape=(3, 3), stride=(2, 2))
     conv2 = nn.rectify(
         nn.SpatialConvolution(32, 32, kernelshape=(3, 3), pad=(0, 0), weight_init=nn.IIDGaussian(std=0.1))(pool1)
     )
     pool2 = nn.max_pool_2d(conv2, kernelshape=(3, 3), stride=(2, 2))
     d0, d1, d2, d3 = pool2.shape
     flatlayer = pool2.reshape([d0, d1 * d2 * d3])
     nfeats = cgt.infer_shape(flatlayer)[1]
     logprobs = nn.logsoftmax(nn.Affine(nfeats, 10)(flatlayer))
     loss = -logprobs[cgt.arange(X.shape[0]), y].mean()
     return loss
Example #10
0
 def build_convnet_return_loss(X, y):
     np.random.seed(0)
     conv1 = nn.rectify(
         nn.SpatialConvolution(1,
                               32,
                               kernelshape=(3, 3),
                               pad=(0, 0),
                               weight_init=nn.IIDGaussian(std=.1))(X))
     pool1 = nn.max_pool_2d(conv1, kernelshape=(3, 3), stride=(2, 2))
     conv2 = nn.rectify(
         nn.SpatialConvolution(32,
                               32,
                               kernelshape=(3, 3),
                               pad=(0, 0),
                               weight_init=nn.IIDGaussian(std=.1))(pool1))
     pool2 = nn.max_pool_2d(conv2, kernelshape=(3, 3), stride=(2, 2))
     d0, d1, d2, d3 = pool2.shape
     flatlayer = pool2.reshape([d0, d1 * d2 * d3])
     nfeats = cgt.infer_shape(flatlayer)[1]
     logprobs = nn.logsoftmax(nn.Affine(nfeats, 10)(flatlayer))
     loss = -logprobs[cgt.arange(X.shape[0]), y].mean()
     return loss
Example #11
0
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no", fixed_shape=(None, n_in))
        a_n = cgt.vector("a_n", dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0) / 128.0
        nhid = 64
        h1 = cgt.tanh(
            nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(
            nn.Affine(nhid, n_actions,
                      weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n * q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_n, q_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n],
                                      [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Example #12
0
 def loglik(self, labels, p):
     return cgt.log(p[cgt.arange(cgt.size(labels, 0)), labels])
Example #13
0
def arange(x):
    return cgt.arange(x)
Example #14
0
def arange(x):
    return cgt.arange(x)
Example #15
0
 def loglik(self, labels, p):
     return cgt.log(p[cgt.arange(cgt.size(labels,0)),labels])