def make_updater_convnet_theano():
        X = TT.tensor4("X") # so shapes can be inferred
        y = TT.ivector("y")
        np.random.seed(0)        
        stepsize = TT.scalar("stepsize")
        layer1 = SpatialConvolutionTheano(1, 32, kernelshape=(3,3), pad=(0,0), 
            weight_init=nn.IIDGaussian(std=.1))
        conv1 = nn.rectify(layer1(X))
        pool1 = theano.tensor.signal.downsample.max_pool_2d(conv1, ds=(3,3), st=(2,2))
        layer2 = SpatialConvolutionTheano(32, 32, kernelshape=(3,3), pad=(0,0), 
            weight_init=nn.IIDGaussian(std=.1))
        conv2 = nn.rectify(layer2(pool1))
        pool2 = theano.tensor.signal.downsample.max_pool_2d(conv2, ds=(3,3), st=(2,2))
        d0,d1,d2,d3 = pool2.shape
        flatlayer = pool2.reshape([d0,d1*d2*d3])
        nfeats = 800 # theano doens't know how to calculate shapes before compiling 
        # the function, so this needs to be computed by hand
        layer3 = AffineTheano(nfeats, 10)
        ip1 = layer3(flatlayer)
        logprobs = logsoftmax_theano(ip1)
        loss = -logprobs[TT.arange(X.shape[0]), y].mean()

        params = [layer1.weight, layer1.bias, layer2.weight, layer2.bias, layer3.weight, layer3.bias]
        gparams = TT.grad(loss, params)
        updates = [(p, p-stepsize*gp) for (p, gp) in zip(params, gparams)]
        return theano.function([X,y, stepsize], loss, updates=updates, allow_input_downcast=True)
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(relu1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = np.load("/Users/joschu/Data/cifar-10-batches-py/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
Esempio n. 4
0
def dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
    X = nn.dropout(X, p_drop_input)
    h = nn.rectify(cgt.dot(X, w_h))

    h = nn.dropout(h, p_drop_hidden)
    h2 = nn.rectify(cgt.dot(h, w_h2))

    h2 = nn.dropout(h2, p_drop_hidden)
    py_x = nn.softmax(cgt.dot(h2, w_o))
    return py_x
Esempio n. 5
0
def dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
    X = nn.dropout(X, p_drop_input)
    h = nn.rectify(cgt.dot(X, w_h))

    h = nn.dropout(h, p_drop_hidden)
    h2 = nn.rectify(cgt.dot(h, w_h2))

    h2 = nn.dropout(h2, p_drop_hidden)
    py_x = nn.softmax(cgt.dot(h2, w_o))
    return py_x
 def build_fc_return_loss(X, y):
     """
     Build fully connected network and return loss
     """
     np.random.seed(0)
     h1 = nn.rectify(nn.Affine(28 * 28, 256, weight_init=nn.IIDGaussian(std=0.1))(X))
     h2 = nn.rectify(nn.Affine(256, 256, weight_init=nn.IIDGaussian(std=0.1))(h1))
     logprobs = nn.logsoftmax(nn.Affine(256, 10, weight_init=nn.IIDGaussian(std=0.1))(h2))
     neglogliks = -logprobs[cgt.arange(X.shape[0]), y]
     loss = neglogliks.mean()
     return loss
Esempio n. 7
0
def dense_model3(X, w_h, w_h2, w_h3, w_o, p_drop_input, p_drop_hidden):
    X = nn.dropout(X, p_drop_input)
    h = nn.rectify(cgt.dot(X, w_h))

    h = nn.dropout(h, p_drop_hidden[0])
    h2 = nn.rectify(cgt.dot(h, w_h2))

    h2 = nn.dropout(h2, p_drop_hidden[1])
    h3 = nn.rectify(cgt.dot(h2, w_h3))

    h3 = nn.dropout(h3, p_drop_hidden[2])
    py_x = nn.softmax(cgt.dot(h3, w_o))
    return py_x
Esempio n. 8
0
 def build_fc_return_loss(X, y):
     """
     Build fully connected network and return loss
     """
     np.random.seed(0)
     h1 = nn.rectify(
         nn.Affine(28 * 28, 256, weight_init=nn.IIDGaussian(std=.1))(X))
     h2 = nn.rectify(
         nn.Affine(256, 256, weight_init=nn.IIDGaussian(std=.1))(h1))
     logprobs = nn.logsoftmax(
         nn.Affine(256, 10, weight_init=nn.IIDGaussian(std=.1))(h2))
     neglogliks = -logprobs[cgt.arange(X.shape[0]), y]
     loss = neglogliks.mean()
     return loss
 def build_convnet_return_loss(X, y):
     np.random.seed(0)
     conv1 = nn.rectify(
         nn.SpatialConvolution(1, 32, kernelshape=(3, 3), pad=(0, 0), weight_init=nn.IIDGaussian(std=0.1))(X)
     )
     pool1 = nn.max_pool_2d(conv1, kernelshape=(3, 3), stride=(2, 2))
     conv2 = nn.rectify(
         nn.SpatialConvolution(32, 32, kernelshape=(3, 3), pad=(0, 0), weight_init=nn.IIDGaussian(std=0.1))(pool1)
     )
     pool2 = nn.max_pool_2d(conv2, kernelshape=(3, 3), stride=(2, 2))
     d0, d1, d2, d3 = pool2.shape
     flatlayer = pool2.reshape([d0, d1 * d2 * d3])
     nfeats = cgt.infer_shape(flatlayer)[1]
     logprobs = nn.logsoftmax(nn.Affine(nfeats, 10)(flatlayer))
     loss = -logprobs[cgt.arange(X.shape[0]), y].mean()
     return loss
    def make_updater_fc_theano():
        X = TT.matrix("X")
        y = TT.ivector("y")
        np.random.seed(0)
        stepsize = TT.scalar("stepsize")
        layer1 = AffineTheano(28 * 28, 256, weight_init=nn.IIDGaussian(std=0.1))
        h1 = nn.rectify(layer1(X))
        layer2 = AffineTheano(256, 256, weight_init=nn.IIDGaussian(std=0.1))
        h2 = nn.rectify(layer2(h1))
        logprobs = logsoftmax_theano(AffineTheano(256, 10, weight_init=nn.IIDGaussian(std=0.1))(h2))
        neglogliks = -logprobs[TT.arange(X.shape[0]), y]
        loss = neglogliks.mean()

        params = [layer1.weight, layer1.bias, layer2.weight, layer2.bias]
        gparams = TT.grad(loss, params)
        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return theano.function([X, y, stepsize], loss, updates=updates, allow_input_downcast=True)
Esempio n. 11
0
    def make_updater_convnet_theano():
        X = TT.tensor4("X")  # so shapes can be inferred
        y = TT.ivector("y")
        np.random.seed(0)
        stepsize = TT.scalar("stepsize")
        layer1 = SpatialConvolutionTheano(1,
                                          32,
                                          kernelshape=(3, 3),
                                          pad=(0, 0),
                                          weight_init=nn.IIDGaussian(std=.1))
        conv1 = nn.rectify(layer1(X))
        pool1 = theano.tensor.signal.downsample.max_pool_2d(conv1,
                                                            ds=(3, 3),
                                                            st=(2, 2))
        layer2 = SpatialConvolutionTheano(32,
                                          32,
                                          kernelshape=(3, 3),
                                          pad=(0, 0),
                                          weight_init=nn.IIDGaussian(std=.1))
        conv2 = nn.rectify(layer2(pool1))
        pool2 = theano.tensor.signal.downsample.max_pool_2d(conv2,
                                                            ds=(3, 3),
                                                            st=(2, 2))
        d0, d1, d2, d3 = pool2.shape
        flatlayer = pool2.reshape([d0, d1 * d2 * d3])
        nfeats = 800  # theano doens't know how to calculate shapes before compiling
        # the function, so this needs to be computed by hand
        layer3 = AffineTheano(nfeats, 10)
        ip1 = layer3(flatlayer)
        logprobs = logsoftmax_theano(ip1)
        loss = -logprobs[TT.arange(X.shape[0]), y].mean()

        params = [
            layer1.weight, layer1.bias, layer2.weight, layer2.bias,
            layer3.weight, layer3.bias
        ]
        gparams = TT.grad(loss, params)
        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return theano.function([X, y, stepsize],
                               loss,
                               updates=updates,
                               allow_input_downcast=True)
Esempio n. 12
0
def convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden):
    l1a = nn.rectify(nn.conv2d(X, w, kernelshape=(3,3), pad=(1,1)))
    l1 = nn.max_pool_2d(l1a, kernelshape=(2, 2), stride=(2,2))
    l1 = nn.dropout(l1, p_drop_conv)

    l2a = nn.rectify(nn.conv2d(l1, w2, kernelshape=(3,3), pad=(1,1)))
    l2 = nn.max_pool_2d(l2a, kernelshape=(2, 2), stride=(2,2))
    l2 = nn.dropout(l2, p_drop_conv)

    l3a = nn.rectify(nn.conv2d(l2, w3, kernelshape=(3,3), pad=(1,1)))
    l3b = nn.max_pool_2d(l3a, kernelshape=(2, 2), stride=(2,2))
    batchsize,channels,rows,cols = l3b.shape
    l3 = cgt.reshape(l3b, [batchsize, channels*rows*cols])
    l3 = nn.dropout(l3, p_drop_conv)

    l4 = nn.rectify(cgt.dot(l3, w4))
    l4 = nn.dropout(l4, p_drop_hidden)
    
    pyx = nn.softmax(cgt.dot(l4, w_o))
    return pyx
Esempio n. 13
0
def convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden):
    l1a = nn.rectify(nn.conv2d(X, w, kernelshape=(3, 3), pad=(1, 1)))
    l1 = nn.max_pool_2d(l1a, kernelshape=(2, 2), stride=(2, 2))
    l1 = nn.dropout(l1, p_drop_conv)

    l2a = nn.rectify(nn.conv2d(l1, w2, kernelshape=(3, 3), pad=(1, 1)))
    l2 = nn.max_pool_2d(l2a, kernelshape=(2, 2), stride=(2, 2))
    l2 = nn.dropout(l2, p_drop_conv)

    l3a = nn.rectify(nn.conv2d(l2, w3, kernelshape=(3, 3), pad=(1, 1)))
    l3b = nn.max_pool_2d(l3a, kernelshape=(2, 2), stride=(2, 2))
    batchsize, channels, rows, cols = l3b.shape
    l3 = cgt.reshape(l3b, [batchsize, channels * rows * cols])
    l3 = nn.dropout(l3, p_drop_conv)

    l4 = nn.rectify(cgt.dot(l3, w4))
    l4 = nn.dropout(l4, p_drop_hidden)

    pyx = nn.softmax(cgt.dot(l4, w_o))
    return pyx
Esempio n. 14
0
    def make_updater_fc_theano():
        X = TT.matrix("X")
        y = TT.ivector("y")
        np.random.seed(0)
        stepsize = TT.scalar("stepsize")
        layer1 = AffineTheano(28 * 28, 256, weight_init=nn.IIDGaussian(std=.1))
        h1 = nn.rectify(layer1(X))
        layer2 = AffineTheano(256, 256, weight_init=nn.IIDGaussian(std=.1))
        h2 = nn.rectify(layer2(h1))
        logprobs = logsoftmax_theano(
            AffineTheano(256, 10, weight_init=nn.IIDGaussian(std=.1))(h2))
        neglogliks = -logprobs[TT.arange(X.shape[0]), y]
        loss = neglogliks.mean()

        params = [layer1.weight, layer1.bias, layer2.weight, layer2.bias]
        gparams = TT.grad(loss, params)
        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return theano.function([X, y, stepsize],
                               loss,
                               updates=updates,
                               allow_input_downcast=True)
Esempio n. 15
0
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no",fixed_shape=(None,n_in))
        a_n = cgt.vector("a_n",dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        nhid, nhid2 = 64, 64
        h0 = (o_no - 128.0)/128.0
        d0 = nn.dropout(h1, .2)

        h1 = nn.rectify(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(d0))
        d1 = nn.dropout(h1, .2)
        h2 = nn.rectify(nn.Affine(nhid,nhid2,weight_init=nn.IIDGaussian(std=.1))(d1))
        # d2 = nn.dropout(h2, .2)
        probs_na = nn.softmax(nn.Affine(nhid2,n_actions,weight_init=nn.IIDGaussian(std=0.01))(d2))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n*q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Esempio n. 16
0
 def build_convnet_return_loss(X, y):
     np.random.seed(0)
     conv1 = nn.rectify(
         nn.SpatialConvolution(1,
                               32,
                               kernelshape=(3, 3),
                               pad=(0, 0),
                               weight_init=nn.IIDGaussian(std=.1))(X))
     pool1 = nn.max_pool_2d(conv1, kernelshape=(3, 3), stride=(2, 2))
     conv2 = nn.rectify(
         nn.SpatialConvolution(32,
                               32,
                               kernelshape=(3, 3),
                               pad=(0, 0),
                               weight_init=nn.IIDGaussian(std=.1))(pool1))
     pool2 = nn.max_pool_2d(conv2, kernelshape=(3, 3), stride=(2, 2))
     d0, d1, d2, d3 = pool2.shape
     flatlayer = pool2.reshape([d0, d1 * d2 * d3])
     nfeats = cgt.infer_shape(flatlayer)[1]
     logprobs = nn.logsoftmax(nn.Affine(nfeats, 10)(flatlayer))
     loss = -logprobs[cgt.arange(X.shape[0]), y].mean()
     return loss
Esempio n. 17
0
File: rrnn.py Progetto: zoemcc/rrnn
def make_deep_rrnn_rot_relu(size_input, size_mem, n_layers, size_output,
                            size_batch_in, k_in, k_h):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)]
    outputs = []
    print 'input_size: ', size_input
    for i_layer in xrange(n_layers):
        prev_h = inputs[
            i_layer +
            1]  # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer == 0 else outputs[i_layer - 1]
        size_x = size_input if i_layer == 0 else size_mem
        size_batch = prev_h.shape[0]

        xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform")
        xform_h_non = xform_h_param.weight
        xform_h_non.props["is_rotation"] = True

        xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True)
        xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1")

        add_in_lin = nn.Affine(size_x, size_mem)(x)
        add_in_relu = nn.rectify(add_in_lin)

        prev_h_scaled = nn.scale_mag(prev_h)

        h_in_added = prev_h_scaled + add_in_relu
        inters_h = [h_in_added]

        colon = slice(None, None, None)

        for i in xrange(2 * k_h):
            inter_in = inters_h[-1]
            r_cur = xform_h[i, :]
            #r_cur = cgt.subtensor(xform_h, [i, colon])
            r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1))
            r_cur_2 = cgt.reshape(r_cur, (1, size_mem))
            ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2)
            inter_out = inter_in - 2 * ref_cur
            inters_h.append(inter_out)
        next_h = inters_h[-1]
        outputs.append(next_h)

    category_activations = nn.Affine(size_mem, size_output,
                                     name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    #print 'len outputs:', len(outputs)
    #print 'len inputs:', len(inputs)

    return nn.Module(inputs, outputs)
Esempio n. 18
0
File: rrnn.py Progetto: zobot/rrnn
def make_deep_rrnn_rot_relu(size_input, size_mem, n_layers, size_output, size_batch_in, k_in, k_h):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers+1)]
    outputs = []
    print 'input_size: ', size_input
    for i_layer in xrange(n_layers):
        prev_h = inputs[i_layer+1] # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer==0 else outputs[i_layer-1]
        size_x = size_input if i_layer==0 else size_mem
        size_batch = prev_h.shape[0]

        xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform")
        xform_h_non = xform_h_param.weight
        xform_h_non.props["is_rotation"] = True

        xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True)
        xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1")

        add_in_lin = nn.Affine(size_x, size_mem)(x)
        add_in_relu = nn.rectify(add_in_lin)

        prev_h_scaled = nn.scale_mag(prev_h)


        h_in_added = prev_h_scaled + add_in_relu
        inters_h = [h_in_added]

        colon = slice(None, None, None)

        for i in xrange(2 * k_h):
            inter_in = inters_h[-1]
            r_cur = xform_h[i, :]
            #r_cur = cgt.subtensor(xform_h, [i, colon])
            r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1))
            r_cur_2 = cgt.reshape(r_cur, (1, size_mem))
            ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2)
            inter_out = inter_in - 2 * ref_cur
            inters_h.append(inter_out)
        next_h = inters_h[-1]
        outputs.append(next_h)


    category_activations = nn.Affine(size_mem, size_output,name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    #print 'len outputs:', len(outputs)
    #print 'len inputs:', len(inputs)

    return nn.Module(inputs, outputs)
Esempio n. 19
0
 def __init__(self, num_features=None, num_hidden=100):
     stepsize = 0.01
     # with shape (batchsize, ncols)
     X = cgt.matrix("X", fixed_shape=(1, num_features))
     # y: a symbolic variable representing the rewards, which are integers
     y = cgt.scalar("y", dtype='float64')
     
     hid1 = nn.rectify(
         nn.Affine(num_features, num_hidden, weight_init=nn.IIDGaussian(std=.1), bias_init=nn.Constant(1))(X)
     )
     # One final fully-connected layer, and then a linear activation output for reward
     output = nn.Affine(num_hidden, 1, weight_init=nn.IIDGaussian(std=.1), bias_init=nn.Constant(1))(hid1)
     abs_deviation = cgt.abs(output - y).mean()
     params = nn.get_parameters(abs_deviation)
     gparams = cgt.grad(abs_deviation, params)
     
     updates = [(p, p-stepsize*gp) for (p, gp) in zip(params, gparams)]
     self.predictor = cgt.function([X], output)
     self.updater = cgt.function([X, y], abs_deviation, updates=updates)
Esempio n. 20
0
    def __init__(self, num_features=None, num_hidden=100):
        stepsize = 0.01
        # with shape (batchsize, ncols)
        X = cgt.matrix("X", fixed_shape=(1, num_features))
        # y: a symbolic variable representing the rewards, which are integers
        y = cgt.scalar("y", dtype='float64')

        hid1 = nn.rectify(
            nn.Affine(num_features,
                      num_hidden,
                      weight_init=nn.IIDGaussian(std=.1),
                      bias_init=nn.Constant(1))(X))
        # One final fully-connected layer, and then a linear activation output for reward
        output = nn.Affine(num_hidden,
                           1,
                           weight_init=nn.IIDGaussian(std=.1),
                           bias_init=nn.Constant(1))(hid1)
        abs_deviation = cgt.abs(output - y).mean()
        params = nn.get_parameters(abs_deviation)
        gparams = cgt.grad(abs_deviation, params)

        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        self.predictor = cgt.function([X], output)
        self.updater = cgt.function([X, y], abs_deviation, updates=updates)
Esempio n. 21
0
Xtrain, Xtest, ytrain, ytest = load_mnist(onehot=False)

# shuffle the data
np.random.seed(42)
sortinds = np.random.permutation(Xtrain.shape[0])
Xtrain = Xtrain[sortinds]
ytrain = ytrain[sortinds]

# Model:
# Two linear/affine layers with a ReLU activation in between
# followed by a logsoftmax.
X = cgt.matrix('X', fixed_shape=(None, 784))
y = cgt.vector('y', dtype='i8')

layer1 = nn.Affine(784, 400, weight_init=nn.XavierNormal())(X)
act1 = nn.rectify(layer1)
layer2 = nn.Affine(400, 400, weight_init=nn.XavierNormal())(act1)
act2 = nn.rectify(layer2)
probs = nn.softmax(nn.Affine(400, 10)(act2))

y_preds = cgt.argmax(probs, axis=1)
cost = -cgt.mean(categorical.loglik(y, probs))
err = cgt.cast(cgt.not_equal(y, y_preds), cgt.floatX).mean()

params = nn.get_parameters(cost)
updates = nn.sgd(cost, params, learning_rate) # train via sgd

# training function
f = cgt.function(inputs=[X, y], outputs=[], updates=updates)
# compute the cost and error
cost_and_err = cgt.function(inputs=[X, y], outputs=[cost, err])
Esempio n. 22
0
     if X.ndim == 4:
         X = cgt.reshape(X, [X.shape[0], X.shape[1]*X.shape[2]*X.shape[3]] )
     param = layer.inner_product_param
     nchanin = infer_shape(X)[1]
     Wshape = (param.num_output, nchanin)
     Wname = layer.param[0].name or layer.name+":W"
     Wval = np.empty(Wshape, dtype=cgt.floatX)
     W = name2node[Wname] = cgt.shared(Wval, name=Wname, fixed_shape_mask="all")
     bshape = (1, param.num_output)
     bname = layer.param[1].name or layer.name+":b"
     bval = np.empty(bshape, dtype=cgt.floatX)
     b = name2node[bname] = cgt.shared(bval, name=bname, fixed_shape_mask="all")
     yname = layer.top[0]
     output = [cgt.broadcast("+",X.dot(W), b, "xx,1x")          ]
 elif layer.type == "ReLU":
     output = [nn.rectify(inputs[0])]
 elif layer.type == "Softmax":
     output = [nn.softmax(inputs[0])]
 elif layer.type == "LRN":
     # XXX needs params
     param = layer.lrn_param
     output = [nn.lrn(inputs[0], param.alpha,param.beta, param.local_size)]
 elif layer.type == "Concat":
     param = layer.concat_param
     output = [cgt.concatenate(inputs, param.concat_dim)            ]
 elif layer.type == "Dropout":
     output = [nn.dropout(inputs[0])]
 elif layer.type == "SoftmaxWithLoss":
     output = [nn.loglik_softmax(inputs[0], inputs[1])]
 elif layer.type == "Accuracy":
     output = [nn.zero_one_loss(inputs[0], inputs[1])]
Esempio n. 23
0
ytrain = ytrain[sortinds]

# reshape for convnet
Xtrainimg = Xtrain.reshape(-1, 1, 28, 28)
Xtestimg = Xtest.reshape(-1, 1, 28, 28)

# Model:
# Make it VGG-like
# VGG nets have 3x3 kernels with length 1 padding and max-pooling has all 2s.
#
# VGG is a large model so here well just do a small part of it.
X = cgt.tensor4('X', fixed_shape=(None, 1, 28, 28))
y = cgt.vector('y', dtype='i8')

conv1 = nn.rectify(
        nn.SpatialConvolution(1, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(X)
        )
pool1 = nn.max_pool_2d(conv1, kernelshape=(2,2), stride=(2,2))

conv2 = nn.rectify(
        nn.SpatialConvolution(32, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(pool1)
        )
pool2 = nn.max_pool_2d(conv2, kernelshape=(2,2), stride=(2,2))
d0, d1, d2, d3 = pool2.shape

flat = pool2.reshape([d0, d1*d2*d3])
nfeats = cgt.infer_shape(flat)[1]
probs = nn.softmax(nn.Affine(nfeats, 10)(flat))
cost = -categorical.loglik(y, probs).mean()

y_preds = cgt.argmax(probs, axis=1)
Esempio n. 24
0
def build_fcn_action_cond_encoder_net(input_shapes, levels=None):
    x_shape, u_shape = input_shapes
    x_c_dim = x_shape[0]
    x1_c_dim = 16
    levels = levels or [3]
    levels = sorted(set(levels))

    X = cgt.tensor4('X', fixed_shape=(None, ) + x_shape)
    U = cgt.matrix('U', fixed_shape=(None, ) + u_shape)

    # encoding
    Xlevels = {}
    for level in range(levels[-1] + 1):
        if level == 0:
            Xlevel = X
        else:
            if level == 1:
                xlevelm1_c_dim = x_c_dim
                xlevel_c_dim = x1_c_dim
            else:
                xlevelm1_c_dim = xlevel_c_dim
                xlevel_c_dim = 2 * xlevel_c_dim
            Xlevel_1 = nn.rectify(
                nn.SpatialConvolution(xlevelm1_c_dim,
                                      xlevel_c_dim,
                                      kernelshape=(3, 3),
                                      pad=(1, 1),
                                      stride=(1, 1),
                                      name='conv%d_1' % level,
                                      weight_init=nn.IIDGaussian(std=0.01))(
                                          Xlevels[level - 1]))
            Xlevel_2 = nn.rectify(
                nn.SpatialConvolution(
                    xlevel_c_dim,
                    xlevel_c_dim,
                    kernelshape=(3, 3),
                    pad=(1, 1),
                    stride=(1, 1),
                    name='conv%d_2' % level,
                    weight_init=nn.IIDGaussian(std=0.01))(Xlevel_1))
            Xlevel = nn.max_pool_2d(Xlevel_2,
                                    kernelshape=(2, 2),
                                    pad=(0, 0),
                                    stride=(2, 2))
        Xlevels[level] = Xlevel

    # bilinear
    Xlevels_next_pred_0 = {}
    Ylevels = OrderedDict()
    Ylevels_diff_pred = OrderedDict()
    for level in levels:
        Xlevel = Xlevels[level]
        Xlevel_diff_pred = Bilinear(input_shapes,
                                    b=None,
                                    axis=2,
                                    name='bilinear%d' % level)(Xlevel, U)
        Xlevels_next_pred_0[level] = Xlevel + Xlevel_diff_pred
        Ylevels[level] = Xlevel.reshape(
            (Xlevel.shape[0], cgt.mul_multi(Xlevel.shape[1:])))
        Ylevels_diff_pred[level] = Xlevel_diff_pred.reshape(
            (Xlevel_diff_pred.shape[0],
             cgt.mul_multi(Xlevel_diff_pred.shape[1:])))

    # decoding
    Xlevels_next_pred = {}
    for level in range(levels[-1] + 1)[::-1]:
        if level == levels[-1]:
            Xlevel_next_pred = Xlevels_next_pred_0[level]
        else:
            if level == 0:
                xlevelm1_c_dim = x_c_dim
            elif level < levels[-1] - 1:
                xlevel_c_dim = xlevelm1_c_dim
                xlevelm1_c_dim = xlevelm1_c_dim // 2
            Xlevel_next_pred_2 = SpatialDeconvolution(
                xlevel_c_dim,
                xlevel_c_dim,
                kernelshape=(2, 2),
                pad=(0, 0),
                stride=(2, 2),
                name='upsample%d' % (level + 1),
                weight_init=nn.IIDGaussian(std=0.01))(Xlevels_next_pred[
                    level +
                    1])  # TODO initialize with bilinear # TODO should rectify?
            Xlevel_next_pred_1 = nn.rectify(
                SpatialDeconvolution(
                    xlevel_c_dim,
                    xlevel_c_dim,
                    kernelshape=(3, 3),
                    pad=(1, 1),
                    stride=(1, 1),
                    name='deconv%d_2' % (level + 1),
                    weight_init=nn.IIDGaussian(std=0.01))(Xlevel_next_pred_2))
            nonlinearity = nn.rectify if level > 0 else cgt.tanh
            Xlevel_next_pred = nonlinearity(
                SpatialDeconvolution(
                    xlevel_c_dim,
                    xlevelm1_c_dim,
                    kernelshape=(3, 3),
                    pad=(1, 1),
                    stride=(1, 1),
                    name='deconv%d_1' % (level + 1),
                    weight_init=nn.IIDGaussian(std=0.01))(Xlevel_next_pred_1))
            if level in Xlevels_next_pred_0:
                coefs = nn.parameter(nn.init_array(nn.Constant(0.5), (2, )),
                                     name='sum%d.coef' % level)
                Xlevel_next_pred = coefs[0] * Xlevel_next_pred + coefs[
                    1] * Xlevels_next_pred_0[level]
            # TODO: tanh should be after sum
        Xlevels_next_pred[level] = Xlevel_next_pred

    X_next_pred = Xlevels_next_pred[0]
    Y = cgt.concatenate(Ylevels.values(), axis=1)
    Y_diff_pred = cgt.concatenate(Ylevels_diff_pred.values(), axis=1)

    X_diff = cgt.tensor4('X_diff', fixed_shape=(None, ) + x_shape)
    X_next = X + X_diff
    loss = ((X_next - X_next_pred)**2).mean(axis=0).sum() / 2.

    net_name = 'FcnActionCondEncoderNet_levels' + ''.join(
        str(level) for level in levels)
    input_vars = OrderedDict([(var.name, var) for var in [X, U, X_diff]])
    pred_vars = OrderedDict([('Y_diff_pred', Y_diff_pred), ('Y', Y),
                             ('X_next_pred', X_next_pred)])
    return net_name, input_vars, pred_vars, loss
Esempio n. 25
0
     Wshape = (param.num_output, nchanin)
     Wname = layer.param[0].name or layer.name + ":W"
     Wval = np.empty(Wshape, dtype=cgt.floatX)
     W = name2node[Wname] = cgt.shared(Wval,
                                       name=Wname,
                                       fixed_shape_mask="all")
     bshape = (1, param.num_output)
     bname = layer.param[1].name or layer.name + ":b"
     bval = np.empty(bshape, dtype=cgt.floatX)
     b = name2node[bname] = cgt.shared(bval,
                                       name=bname,
                                       fixed_shape_mask="all")
     yname = layer.top[0]
     output = [cgt.broadcast("+", X.dot(W), b, "xx,1x")]
 elif layer.type == "ReLU":
     output = [nn.rectify(inputs[0])]
 elif layer.type == "Softmax":
     output = [nn.softmax(inputs[0])]
 elif layer.type == "LRN":
     # XXX needs params
     param = layer.lrn_param
     output = [
         nn.lrn(inputs[0], param.alpha, param.beta, param.local_size)
     ]
 elif layer.type == "Concat":
     param = layer.concat_param
     output = [cgt.concatenate(inputs, param.concat_dim)]
 elif layer.type == "Dropout":
     output = [nn.dropout(inputs[0])]
 elif layer.type == "SoftmaxWithLoss":
     output = [nn.loglik_softmax(inputs[0], inputs[1])]