Example #1
0
 def __call__(self, x):
     if self.original_stride != 1:
         _, _, width, height = cgt.infer_shape(x)
         unstrided_width = width * self.original_stride[0]
         unstrided_height = height * self.original_stride[1]
         # workaround for this
         # cgt.inc_subtensor(upsampled, (slice(None), slice(None), slice(None, None, self.original_stride[0])), slice(None, None, self.original_stride[1])), x)
         placeholder = cgt.zeros((x.shape[0], x.shape[1], width,
                                  unstrided_height))  # (None, 64, 4, 8)
         cgt.inc_subtensor(placeholder,
                           (slice(None), slice(None), slice(None),
                            slice(None, None, self.original_stride[1])), x)
         upsampled = cgt.zeros((x.shape[0], x.shape[1], unstrided_width,
                                unstrided_height))  # (None, 64, 8, 8)
         cgt.inc_subtensor(
             upsampled,
             (slice(None), slice(None),
              slice(None, None, self.original_stride[0]), slice(None)),
             placeholder)
     else:
         upsampled = x
     # then we conv to deconv
     deconv = super(SpatialDeconvolution, self).__call__(upsampled)
     # lastly we cut off original padding
     pad = self.original_pad
     original_width = (
         (width - 1) * self.original_stride[0]
     ) - 2 * self.original_pad[0] + self.original_kernelshape[0]
     original_height = (
         (height - 1) * self.original_stride[1]
     ) - 2 * self.original_pad[1] + self.original_kernelshape[1]
     t = deconv[:, :, pad[0]:(pad[0] + original_width),
                pad[1]:(pad[1] + original_height)]
     return t
Example #2
0
def denseLayer(nn_input, num_units, activation=rectify, w_init=XavierNormal(), bias_init=Constant(0)):
    """
    Batch by feature input.
    """
    if len(nn_input.shape) > 2:
        nn_input = cgt.reshape(nn_input, [nn_input.shape[0], reduce(lambda x, y: x*y, nn_input.shape[1:])])
    feature_dims = cgt.infer_shape(nn_input)[1]
    return activation(Affine(feature_dims, num_units, weight_init=w_init, bias_init=bias_init)(nn_input))
Example #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
Example #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(relu1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = np.load("/Users/joschu/Data/cifar-10-batches-py/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
Example #5
0
def GRULayer(nn_input, num_units, activation=cgt.sigmoid, backwards=False,
             w_init=XavierNormal(), hid_out_init=IIDUniform(0, 1)):
        if len(nn_input.shape) > 3:
            nn_input = nn_input.reshape([nn_input.shape[0], nn_input.shape[1], reduce(lambda x, y: x*y, nn_input.shape[2:])])
        in_shape = cgt.infer_shape(nn_input)
        time_dim = in_shape[1]
        feature_dims = in_shape[2]
        return GRU(input_feature_size=feature_dims, input_time_size=time_dim, num_units=num_units,
                   weight_init=w_init, hid_out_init=hid_out_init, activation=activation, backwards=backwards)(nn_input)
Example #6
0
def LSTMLayer(nn_input, num_units, activation=rectify, backwards=False,
              w_init=XavierNormal(), hid_out_init=IIDUniform(0, 1), cell_out_init=IIDUniform(0, 1)):
        if len(nn_input.shape) > 3:
            nn_input = nn_input.reshape([nn_input.shape[0], nn_input.shape[1], nn_input.shape[2:]])
        in_shape = cgt.infer_shape(nn_input)
        time_dim = in_shape[1]
        feature_dims = in_shape[2]
        return LSTM(input_feature_size=feature_dims, input_time_size=time_dim, num_units=num_units,
                    weight_init=w_init, hid_out_init=hid_out_init, cell_out_init=cell_out_init,
                    activation=activation, backwards=backwards)(nn_input)
Example #7
0
def recurrentLayer(nn_input, num_units, activation=rectify, w_init=XavierNormal(),
                   hid_out_init=IIDUniform(0, 1), backwards=False, mask=None):
    """
    Batch by time by features
    """
    if len(nn_input.shape) > 3:
        nn_input = nn_input.reshape([nn_input.shape[0], nn_input.shape[1], reduce(lambda x, y: x*y, nn_input.shape[2:])])
    in_shape = cgt.infer_shape(nn_input)
    time_dim = in_shape[1]
    feature_dims = in_shape[2]
    return Recurrent(input_feature_size=feature_dims, input_time_size=time_dim,
                     num_units=num_units, weight_init=w_init, hid_out_init=hid_out_init,
                     activation=activation, backwards=backwards)(nn_input)
Example #8
0
def test_get_train_objective():
    batch_size = 32
    feat_t_steps = 5
    feat_num_features = 256
    max_label_length = 5
    num_out_classes = 27
    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes))
    seq2seq = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes)
    train_objective = seq2seq.get_train_objective(max_label_length=max_label_length,
                                                  ground_labels_basis_btc=ground_labels_basis)
    train_shape = cgt.infer_shape(train_objective)
    assert train_shape == ()
    nn.get_parameters(train_objective)
Example #9
0
def pyramidLayer(nn_input, temporal_resolution_decrease=2):
    """
    Batch by time by features. Decreases temporal resolution and increases feature dimension by a resolution decrease factor.
    """
    t_steps = cgt.infer_shape(nn_input)[1]
    if t_steps % temporal_resolution_decrease != 0:
        raise ValueError('number of timesteps is not divisable by resolution decrease!')
    out_list = []
    for iter_step in range(0, t_steps, temporal_resolution_decrease):
        concentrate_list = []
        for sub_iter_step in range(0, temporal_resolution_decrease):
            concentrate_list.append(nn_input[:, iter_step + sub_iter_step, :])
        out_list.append(cgt.concatenate(concentrate_list, axis=1))
    return cgt.dimshuffle(cgt.stack(out_list), [1, 0, 2])
Example #10
0
def test_im2col():
    for settings in [ ((4,4),(0,0),(1,1)), ((3,3),(1,1),(2,2)), ((3,3),(1,1),(3,3)) ]:
        xval = np.arange(2*1*28*28).reshape(2,1,28,28).astype(cgt.floatX)
        x = cgt.tensor4("x", fixed_shape=xval.shape)
        y = im2col(x, *settings)
        h = cgt.constant(np.random.randn(*cgt.infer_shape(y)))
        cost = (y*h).sum()

        fcost = cgt.function([x],cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval,eps=1e-5)
        gana = fgrad(xval)
        assert np.allclose(gnum, gana)
Example #11
0
def temporalDenseLayer(nn_input, num_units, activation=rectify, w_init=XavierNormal(), bias_init=Constant(0)):
    """
    Batch by time by features.
    """
    if len(nn_input.shape) > 3:
        nn_input = nn_input.reshape([nn_input.shape[0], nn_input.shape[1], nn_input.shape[2:]])
    dims = cgt.infer_shape(nn_input)
    temporal_dims = dims[1]
    feature_dims = dims[2]
    affine_underbelly = Affine(feature_dims, num_units, weight_init=w_init, bias_init=bias_init)
    out_list = []
    for iter_step in range(0, temporal_dims):
        input_slice = nn_input[:, iter_step, :]
        out_list.append(activation(affine_underbelly(input_slice)))
    return cgt.dimshuffle(cgt.stack(out_list), [1, 0, 2])
 def build_convnet_return_loss(X, y):
     np.random.seed(0)
     conv1 = nn.rectify(
         nn.SpatialConvolution(1, 32, kernelshape=(3, 3), pad=(0, 0), weight_init=nn.IIDGaussian(std=0.1))(X)
     )
     pool1 = nn.max_pool_2d(conv1, kernelshape=(3, 3), stride=(2, 2))
     conv2 = nn.rectify(
         nn.SpatialConvolution(32, 32, kernelshape=(3, 3), pad=(0, 0), weight_init=nn.IIDGaussian(std=0.1))(pool1)
     )
     pool2 = nn.max_pool_2d(conv2, kernelshape=(3, 3), stride=(2, 2))
     d0, d1, d2, d3 = pool2.shape
     flatlayer = pool2.reshape([d0, d1 * d2 * d3])
     nfeats = cgt.infer_shape(flatlayer)[1]
     logprobs = nn.logsoftmax(nn.Affine(nfeats, 10)(flatlayer))
     loss = -logprobs[cgt.arange(X.shape[0]), y].mean()
     return loss
Example #13
0
def test_im2col():
    for settings in [((4, 4), (0, 0), (1, 1)), ((3, 3), (1, 1), (2, 2)),
                     ((3, 3), (1, 1), (3, 3))]:
        xval = np.arange(2 * 1 * 28 * 28).reshape(2, 1, 28,
                                                  28).astype(cgt.floatX)
        x = cgt.tensor4("x", fixed_shape=xval.shape)
        y = im2col(x, *settings)
        h = cgt.constant(np.random.randn(*cgt.infer_shape(y)))
        cost = (y * h).sum()

        fcost = cgt.function([x], cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval, eps=1e-5)
        gana = fgrad(xval)
        assert np.allclose(gnum, gana)
Example #14
0
def test_cpu_pool(**kwargs):
    np.random.seed(0)
    x = cgt.tensor4("x", fixed_shape=(2, 3, 5, 7))
    y = max_pool_2d(x, (4, 4), (0, 0), (1, 1))
    xval = np.random.randn(2, 3, 5, 7)
    hval = np.random.randn(*cgt.infer_shape(y))
    h = cgt.constant(hval)

    cost = (y * h).sum()

    fcost = cgt.function([x], cost)
    fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

    from cgt.numeric_diff import numeric_grad
    gnum = numeric_grad(fcost, xval)
    gana = fgrad(xval)
    assert np.allclose(gnum, gana)
Example #15
0
def test_pool(**kwargs):
    np.random.seed(0)
    x = cgt.tensor4("x", fixed_shape=(2,3,5,7))
    y = max_pool_2d(x, (4,4),(0,0),(1,1))
    xval = np.random.randn(2,3,5,7)
    hval = np.random.randn(*cgt.infer_shape(y))
    h = cgt.constant(hval)

    cost = (y*h).sum()

    fcost = cgt.function([x], cost)
    fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

    from cgt.numeric_diff import numeric_grad
    gnum = numeric_grad(fcost, xval)
    gana = fgrad(xval)
    assert np.allclose(gnum,gana)
Example #16
0
def test_lrn():
    if not get_compile_info()["CGT_ENABLE_CUDA"]:
        raise SkipTest("Skipping because CUDA disabled")

    nr.seed(0)
    Xval = nr.randn(4,8,16,16)
    X = cgt.shared(Xval, name="X", fixed_shape_mask="all")
    # X = cgt.tensor4(name='X')
    y = cross_channel_lrn(X, localsize=4, alpha=.1, beta=.5)
    f = cgt.function([],y)
    print f().sum()
    print f().sum()
    print f().sum()
    assert np.isfinite(f().sum())
    # print f(Xval).sum()
    a = nr.rand(*cgt.infer_shape(y))
    loss = (y*a).sum()
    gradcheck_model(loss, [X],eps=1e-5)
Example #17
0
def test_lrn():
    if not get_compile_info()["CGT_ENABLE_CUDA"]:
        raise SkipTest("Skipping because CUDA disabled")

    nr.seed(0)
    Xval = nr.randn(4, 8, 16, 16)
    X = cgt.shared(Xval, name="X", fixed_shape_mask="all")
    # X = cgt.tensor4(name='X')
    y = cross_channel_lrn(X, localsize=4, alpha=.1, beta=.5)
    f = cgt.function([], y)
    print f().sum()
    print f().sum()
    print f().sum()
    assert np.isfinite(f().sum())
    # print f(Xval).sum()
    a = nr.rand(*cgt.infer_shape(y))
    loss = (y * a).sum()
    gradcheck_model(loss, [X], eps=1e-5)
Example #18
0
def test_get_context():
    batch_size = 32
    feat_t_steps = 3
    feat_num_features = 30
    state_num_features = 20
    num_out_classes = 28
    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    prev_out = cgt.matrix(fixed_shape=(batch_size, state_num_features))
    sigmoided = cgt.sigmoid(prev_out)
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes, feature_size=feat_num_features, decoder_size=state_num_features)
    mm = cgt.infer_shape(s.features_post_mlp_btf)
    assert mm == (batch_size, feat_t_steps, feat_num_features)
    context_out = s.get_context(sigmoided)
    out_fun = cgt.function([feats, prev_out], [context_out])
    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*state_num_features), (batch_size, state_num_features))
    m = out_fun(tau, tau2)[0]
    assert m.shape == (batch_size, feat_num_features)
    assert np.mean(m) < 1
Example #19
0
    def __init__(self, nn_input_btf, num_out_classes, get_features_fun=None,
                 feature_size=40, decoder_size=40, w_init=IIDUniform(-0.1, 0.1)):

        self.start_token_index = num_out_classes
        self.end_token_index = self.start_token_index + 1
        self.true_number_classes = num_out_classes + 2  # add dims for start and end token.
        self.batch_size = cgt.infer_shape(nn_input_btf)[0]
        self.w_init = w_init
        self.feature_size = feature_size
        self.decoder_size = decoder_size

        if get_features_fun is not None:
            self.get_features_fun = get_features_fun
        else:
            self.get_features_fun = self.get_features_bengio

        features_btf = self.get_features_fun(nn_input_btf, num_units=self.feature_size)
        # Compute psi<h_u> over all u (timesteps), the features from the ground data.

        # This is for computing the context c_i. The features are put through a dense layer.
        self.features_post_mlp_btf = temporalDenseLayer(features_btf, self.feature_size, w_init=self.w_init,
                                                        activation=linear, bias_init=Constant(0.0))

        self.mixing_vec_w = parameter(init_array(w_init, (1, 1, self.feature_size,)), name=None)

        # These are for the decoder mechanism, which computes s_i.
        rnn_activation = cgt.sigmoid
        recurrence = Recurrent

        self.recurrent_decoder_one = recurrence(num_units=self.decoder_size, input_time_size=None,
                                                input_feature_size=self.feature_size + self.true_number_classes,
                                                weight_init=self.w_init, activation=rnn_activation).take_one_step
        self.recurrent_decoder_two = linear
        #self.recurrent_decoder_two = recurrence(num_units=self.decoder_size, input_time_size=None,
        #                                        input_feature_size=self.decoder_size,
        #                                        weight_init=self.w_init, activation=rnn_activation).take_one_step

        # Multiply s_i by V to make it have same dimension as h_u.
        self.states_mlp_bf = Affine(self.decoder_size, self.feature_size,
                                    weight_init=self.w_init, bias_init=Constant(0.0))
        # This is the final dense layer, which computes the class probs at the end of all things.
        self.final_out_dense = Affine(self.decoder_size + self.feature_size, self.true_number_classes,
                                      weight_init=w_init, bias_init=Constant(0.0))
Example #20
0
def test_cpu_pool():
    with cgt.scoped_update_config(precision="quad",backend="native"):
        print cgt.get_precision()
        ci = get_compile_info()

        np.random.seed(0)
        x = cgt.tensor4("x", fixed_shape=(2,3,5,7))
        y = max_pool_2d(x, (4,4),(0,0),(1,1))
        xval = np.random.randn(2,3,5,7)
        hval = np.random.randn(*cgt.infer_shape(y))
        h = cgt.constant(hval)

        cost = (y*h).sum()

        fcost = cgt.function([x], cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval)
        gana = fgrad(xval)

        assert np.allclose(gnum,gana)
Example #21
0
def test_cpu_pool():
    with cgt.scoped_update_config(precision="quad", backend="native"):
        print cgt.get_precision()
        ci = get_compile_info()

        np.random.seed(0)
        x = cgt.tensor4("x", fixed_shape=(2, 3, 5, 7))
        y = max_pool_2d(x, (4, 4), (0, 0), (1, 1))
        xval = np.random.randn(2, 3, 5, 7)
        hval = np.random.randn(*cgt.infer_shape(y))
        h = cgt.constant(hval)

        cost = (y * h).sum()

        fcost = cgt.function([x], cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval)
        gana = fgrad(xval)

        assert np.allclose(gnum, gana)
Example #22
0
 def build_convnet_return_loss(X, y):
     np.random.seed(0)
     conv1 = nn.rectify(
         nn.SpatialConvolution(1,
                               32,
                               kernelshape=(3, 3),
                               pad=(0, 0),
                               weight_init=nn.IIDGaussian(std=.1))(X))
     pool1 = nn.max_pool_2d(conv1, kernelshape=(3, 3), stride=(2, 2))
     conv2 = nn.rectify(
         nn.SpatialConvolution(32,
                               32,
                               kernelshape=(3, 3),
                               pad=(0, 0),
                               weight_init=nn.IIDGaussian(std=.1))(pool1))
     pool2 = nn.max_pool_2d(conv2, kernelshape=(3, 3), stride=(2, 2))
     d0, d1, d2, d3 = pool2.shape
     flatlayer = pool2.reshape([d0, d1 * d2 * d3])
     nfeats = cgt.infer_shape(flatlayer)[1]
     logprobs = nn.logsoftmax(nn.Affine(nfeats, 10)(flatlayer))
     loss = -logprobs[cgt.arange(X.shape[0]), y].mean()
     return loss
Example #23
0
                                         (hidden_size, hidden_size)),
                              name=name + ".W_hh")

        # hidden to output
        self.W_ho = parameter(init_array(weight_init,
                                         (hidden_size, hidden_size)),
                              name=name + ".W_ho")

    def __call__(self, x, prev_h):
        """
        x is the input
        prev_h is the input from the previous timestep

        Returns (out, next_h). Feed out into the next layer and
        next_h to the next timestep.
        """

        next_h = cgt.tanh(prev_h.dot(self.W_hh) + x.dot(self.W_xh))
        out = next_h.dot(self.W_ho)
        return out, next_h


# Make sure it compiles!

x = cgt.matrix()  # (batch_size, n_features)
h = cgt.matrix()  # this will later be the identity matrix

o, next_h = RNNCell(5, 10)(x, h)
print("Output:", o, cgt.infer_shape(o))
print("Next Hidden:", next_h, cgt.infer_shape(next_h))
Example #24
0
        Returns next_h. For the GRU the output to the next timestep
        and next layer is one and the same. Copy it first!
        """

        reset_gate = cgt.sigmoid(x.dot(self.W_xr) + prev_h.dot(self.W_hr))
        update_gate = cgt.sigmoid(x.dot(self.W_xz) + prev_h.dot(self.W_hz))

        # the elementwise multiplication here tells what of the previous
        # input we should forget.
        forget_gate = reset_gate * prev_h

        # this part is very similar to vanilla RNN
        h_candidate = cgt.tanh(x.dot(self.W_xc) + prev_h.dot(forget_gate))
        # this isn't super clear in the paper just it's an elementwise mult here
        next_h = (1. - update_gate) * h + update_gate * h_candidate

        # In a standard GRU cell we only have 1 output.
        # However, it should be be copied and feed to
        # both the next timestep and the next layer
        return next_h


# Make sure it compiles!

x = cgt.matrix()  # (batch_size, n_features)
h = cgt.matrix()  # this will later be the identity matrix

next_h = GRUCell(5, 10)(x, h)
print("Next Hidden:", next_h, cgt.infer_shape(next_h))
Example #25
0
    def take_one_step(self, nn_input_bf, hid_out):

        #PROBABLY BUGGED. SHOULD BE REWRITTEN.

        self.num_batches = cgt.infer_shape(nn_input_bf)[0]

        # (n_time_steps, n_batch, n_features)
        #input_bf = cgt.dimshuffle(nn_input_bf, [1, 0, 2])

        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        W_in_stacked = cgt.concatenate(
            [self.W_in_to_resetgate, self.W_in_to_updategate,
             self.W_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        W_hid_stacked = cgt.concatenate(
            [self.W_hid_to_resetgate, self.W_hid_to_updategate,
             self.W_hid_to_hidden_update], axis=1)

        # Stack gate biases into a (3*num_units) vector
        b_stacked = cgt.concatenate(
            [self.b_resetgate, self.b_updategate,
             self.b_hidden_update], axis=1)


        # At each loop, input_n will be (n_time_steps, 3*num_units).
        # We define a slicing function that extract the input to each GRU gate
        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input__n is the n'th vector of the input
        def step(input_n, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
            hid_input = cgt.dot(hid_previous, W_hid_stacked)

            # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
            input_n = cgt.broadcast("+", input_n.dot(W_in_stacked), b_stacked, "xx,1x")

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate*hidden_update_hid

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate)*hid_previous + updategate*hidden_update
            return self.nonlinearity_hid(hid)  # adding this non-linearity seems to help stability.
            #return hid

        if hid_out is None:
            if self.hid_out is None:
                self.hid_out = cgt.dot(cgt.ones((self.num_batches, 1)), self.hid_init)
            hid_out = self.hid_out



        # Retrieve the dimensionality of the incoming layer
        hid_out = step(nn_input_bf, hid_out, W_hid_stacked, W_in_stacked, b_stacked)

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        # self.hid_out = cgt.dimshuffle(self.hid_out, [1, 0, 2])

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = cgt.flip(hid_out, [1])

        self.hid_out = hid_out

        return hid_out
Example #26
0
    def __call__(self, input_btf):

        # (n_time_steps, n_batch, n_features)
        input_tbf = cgt.dimshuffle(input_btf, [1, 0, 2])
        self.num_batches = cgt.infer_shape(input_tbf)[1]

        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        W_in_stacked = cgt.concatenate(
            [self.W_in_to_resetgate, self.W_in_to_updategate,
             self.W_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        W_hid_stacked = cgt.concatenate(
            [self.W_hid_to_resetgate, self.W_hid_to_updategate,
             self.W_hid_to_hidden_update], axis=1)

        # Stack gate biases into a (3*num_units) vector
        b_stacked = cgt.concatenate(
            [self.b_resetgate, self.b_updategate,
             self.b_hidden_update], axis=1)


        # At each loop, input_n will be (n_time_steps, 3*num_units).
        # We define a slicing function that extract the input to each GRU gate
        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input__n is the n'th vector of the input
        def step(input_n, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
            hid_input = cgt.dot(hid_previous, W_hid_stacked)

            # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
            input_n = cgt.broadcast("+", input_n.dot(W_in_stacked), b_stacked, "xx,1x")

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate*hidden_update_hid

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate)*hid_previous + updategate*hidden_update
            return hid

        sequences = [input_tbf]
        step_fun = step
        hid_init = cgt.dot(cgt.ones((self.num_batches, 1)), self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_seqs = [W_hid_stacked]
        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        non_seqs += [W_in_stacked, b_stacked]
        # theano.scan only allows for positional arguments, so when
        # self.precompute_input is True, we need to supply fake placeholder
        # arguments for the input weights and biases.

        # Retrieve the dimensionality of the incoming layer
        hid_out = unroll_lstm(
            fn=step_fun,
            sequences=sequences,
            outputs_info=[hid_init],
            go_backwards=self.backwards,
            non_sequences=non_seqs,
            n_steps=self.timesteps)[0]

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = cgt.dimshuffle(hid_out, [1, 0, 2])

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = cgt.flip(hid_out, [1])

        return hid_out
Example #27
0
        Returns (next_c, next_h).

        next_h should be cloned since it's feed into the next layer and
        the next timstep.
        """

        forget_gate = cgt.sigmoid(x.dot(self.W_xf) + prev_h.dot(self.W_hf))
        input_gate = cgt.sigmoid(x.dot(self.W_xi) + prev_h.dot(self.W_hi))
        output_gate = cgt.sigmoid(x.dot(self.W_xo) + prev_h.dot(self.W_ho))
        candidate_values = cgt.tanh(x.dot(self.W_xc) + prev_h.dot(self.W_hc))

        # new cell state
        next_c = forget_gate * prev_c + input_gate * candidate_values
        # input for next timestep
        next_h = output_gate * cgt.tanh(next_c)

        # NOTE: we feed next_h into the next layer and the next timestep
        # so we should clone the next_h output.
        return next_c, next_h


# Make sure it compiles!

x = cgt.matrix()  # (batch_size, n_features)
h = cgt.matrix()  # this will later be the identity matrix
c = cgt.matrix()  # this will later be the identity matrix

next_c, next_h = LSTMCell(5, 10)(x, c, h)
print("Next Cell State:", next_c, cgt.infer_shape(next_c))
print("Next Hidden:", next_h, cgt.infer_shape(next_h))
Example #28
0
import cgt
import numpy as np
from cgt.nn import parameter, init_array, HeUniform, Constant

# ignore bias for the sake of simplicity
class FeedforwardCell(object):
    def __init__(self, input_size, output_size, name="", weight_init=HeUniform(1.0), bias_init=Constant(0)):
        """
        Initialize an Feedforward cell.
        """

        self.W = parameter(init_array(weight_init, (input_size, output_size)), name=name + ".W")
        self.b = parameter(init_array(bias_init, (1, output_size)), name=name + ".b")

    def __call__(self, x):
        """
        x is the input

        Returns the output to feed as the input into the next layer.
        """

        return cgt.broadcast("+", x.dot(self.W), self.b, "xx,1x")


# Make sure it compiles!

# x is a matrix of size (batch_size, features_size)
x = cgt.matrix()
o = FeedforwardCell(5, 10)(x)
print("Output:", o, cgt.infer_shape(o))
Example #29
0
                 input_size,
                 output_size,
                 name="",
                 weight_init=HeUniform(1.0),
                 bias_init=Constant(0)):
        """
        Initialize an Feedforward cell.
        """

        self.W = parameter(init_array(weight_init, (input_size, output_size)),
                           name=name + ".W")
        self.b = parameter(init_array(bias_init, (1, output_size)),
                           name=name + '.b')

    def __call__(self, x):
        """
        x is the input

        Returns the output to feed as the input into the next layer.
        """

        return cgt.broadcast("+", x.dot(self.W), self.b, "xx,1x")


# Make sure it compiles!

# x is a matrix of size (batch_size, features_size)
x = cgt.matrix()
o = FeedforwardCell(5, 10)(x)
print("Output:", o, cgt.infer_shape(o))
Example #30
0
        # hidden to hidden
        self.W_hh = parameter(init_array(weight_init, (hidden_size, hidden_size)),
            name=name+".W_hh")

        # hidden to output
        self.W_ho = parameter(init_array(weight_init, (hidden_size, hidden_size)),
            name=name+".W_ho")

    def __call__(self, x, prev_h):
        """
        x is the input
        prev_h is the input from the previous timestep

        Returns (out, next_h). Feed out into the next layer and
        next_h to the next timestep.
        """

        next_h = cgt.tanh(prev_h.dot(self.W_hh) + x.dot(self.W_xh))
        out = next_h.dot(self.W_ho)
        return out, next_h

# Make sure it compiles!

x = cgt.matrix() # (batch_size, n_features)
h = cgt.matrix() # this will later be the identity matrix

o, next_h = RNNCell(5, 10)(x, h)
print("Output:", o, cgt.infer_shape(o))
print("Next Hidden:", next_h, cgt.infer_shape(next_h))
Example #31
0
X = cgt.tensor4('X', fixed_shape=(None, 1, 28, 28))
y = cgt.vector('y', dtype='i8')

conv1 = nn.rectify(
        nn.SpatialConvolution(1, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(X)
        )
pool1 = nn.max_pool_2d(conv1, kernelshape=(2,2), stride=(2,2))

conv2 = nn.rectify(
        nn.SpatialConvolution(32, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(pool1)
        )
pool2 = nn.max_pool_2d(conv2, kernelshape=(2,2), stride=(2,2))
d0, d1, d2, d3 = pool2.shape

flat = pool2.reshape([d0, d1*d2*d3])
nfeats = cgt.infer_shape(flat)[1]
probs = nn.softmax(nn.Affine(nfeats, 10)(flat))
cost = -categorical.loglik(y, probs).mean()

y_preds = cgt.argmax(probs, axis=1)
err = cgt.cast(cgt.not_equal(y, y_preds), cgt.floatX).mean()

params = nn.get_parameters(cost)
updates = nn.sgd(cost, params, 1e-3) 

# training function
f = cgt.function(inputs=[X, y], outputs=[], updates=updates)
# compute the cost and error
cost_and_err = cgt.function(inputs=[X, y], outputs=[cost, err])

for i in xrange(epochs):
Example #32
0
        Returns (next_c, next_h).

        next_h should be cloned since it's feed into the next layer and
        the next timstep.
        """

        forget_gate = cgt.sigmoid(x.dot(self.W_xf) + prev_h.dot(self.W_hf))
        input_gate = cgt.sigmoid(x.dot(self.W_xi) + prev_h.dot(self.W_hi))
        output_gate = cgt.sigmoid(x.dot(self.W_xo) + prev_h.dot(self.W_ho))
        candidate_values = cgt.tanh(x.dot(self.W_xc) + prev_h.dot(self.W_hc))

        # new cell state
        next_c = forget_gate * prev_c + input_gate * candidate_values
        # input for next timestep
        next_h = output_gate * cgt.tanh(next_c)

        # NOTE: we feed next_h into the next layer and the next timestep
        # so we should clone the next_h output.
        return next_c, next_h

# Make sure it compiles!

x = cgt.matrix() # (batch_size, n_features)
h = cgt.matrix() # this will later be the identity matrix
c = cgt.matrix() # this will later be the identity matrix

next_c, next_h = LSTMCell(5, 10)(x, c, h)
print("Next Cell State:", next_c, cgt.infer_shape(next_c))
print("Next Hidden:", next_h, cgt.infer_shape(next_h))