예제 #1
0
    def _get_primary_net(self):
        nwn = self.num_mlp_params
        t = np.cast['int32'](0)
        k = np.cast['int32'](0)
        if self.dataset == 'mnist':
            p_net = lasagne.layers.InputLayer([None, 1, 28, 28])
        elif self.dataset == 'cifar10':
            p_net = lasagne.layers.InputLayer([None, 3, 32, 32])
        print p_net.output_shape
        inputs = {p_net: self.input_var}
        for ws, args in zip(self.weight_shapes, self.args):
            num_param = np.prod(ws)
            num_kernel = ws[1]
            weight = self.kernel_weights[k:k + num_kernel, :]
            weight = weight.dimshuffle(1,0).reshape(self.kernel_shape + \
                                                    (num_kernel,))

            weight = weight.dimshuffle(0, 3, 1, 2)

            num_filters = args[0]
            filter_size = args[1]
            stride = args[2]
            pad = args[3]
            nonl = args[4]
            p_net = stochasticConv2DLayer([p_net, weight],
                                          num_filters,
                                          filter_size,
                                          stride,
                                          pad,
                                          nonlinearity=nonl)

            if args[5] == 'max':
                p_net = lasagne.layers.MaxPool2DLayer(p_net, 2)
            print p_net.output_shape
            t += num_param
            k += num_kernel

        assert self.num_mlp_layers == 1
        for layer in range(self.num_mlp_layers):
            w_layer = lasagne.layers.InputLayer((None, self.num_hids))
            weight = self.weights[:, t:t + self.num_hids].reshape(
                (self.wd1, self.num_hids))
            inputs[w_layer] = weight
            p_net = stochasticDenseLayer2([p_net, w_layer],
                                          self.num_hids,
                                          nonlinearity=nonlinearities.rectify)
            t += self.num_hids

        w_layer = lasagne.layers.InputLayer((None, self.num_classes))
        weight = self.weights[:, t:t + self.num_classes].reshape(
            (self.wd1, self.num_classes))
        inputs[w_layer] = weight
        p_net = stochasticDenseLayer2([p_net, w_layer],
                                      self.num_classes,
                                      nonlinearity=nonlinearities.softmax)

        y = T.clip(get_output(p_net, inputs), 0.001, 0.999)  # stability

        self.p_net = p_net
        self.y = y
예제 #2
0
    def _get_primary_net(self):
        nc = self.num_classes
        t = np.cast['int32'](0)
        p_net = lasagne.layers.InputLayer([None, 1, 28, 28])
        inputs = {p_net: self.input_var}
        for ws, args in zip(self.weight_shapes, self.args):
            num_param = np.prod(ws)
            weight = self.weights[:, t:t + num_param].reshape(ws)
            num_filters = args[0]
            filter_size = args[1]
            stride = args[2]
            pad = args[3]
            nonl = args[4]
            p_net = stochasticConv2DLayer([p_net, weight],
                                          num_filters,
                                          filter_size,
                                          stride,
                                          pad,
                                          nonlinearity=nonl)
            print p_net.output_shape
            t += num_param

        w_layer = lasagne.layers.InputLayer((None, nc))
        weight = self.weights[:, t:t + nc].reshape((self.wd1, nc))
        inputs[w_layer] = weight
        p_net = stochasticDenseLayer2([p_net, w_layer],
                                      nc,
                                      nonlinearity=nonlinearities.softmax)

        y = T.clip(get_output(p_net, inputs), 0.001, 0.999)  # stability

        self.p_net = p_net
        self.y = y
예제 #3
0
def main():
    """
    MNIST example
    weight norm reparameterized MLP with prior on rescaling parameters
    """

    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--coupling', action='store_true')
    parser.add_argument('--size', default=10000, type=int)
    parser.add_argument('--lrdecay', action='store_true')
    parser.add_argument('--lr0', default=0.1, type=float)
    parser.add_argument('--lbda', default=0.01, type=float)
    parser.add_argument('--bs', default=50, type=int)
    args = parser.parse_args()
    print args

    coupling = args.coupling
    lr0 = args.lr0
    lrdecay = args.lrdecay
    lbda = np.cast[floatX](args.lbda)
    bs = args.bs
    size = max(10, min(50000, args.size))
    clip_grad = 5
    max_norm = 10

    # load dataset
    filename = '/data/lisa/data/mnist.pkl.gz'
    train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename)
    train_x = train_x.reshape(50000, 1, 28, 28)
    valid_x = valid_x.reshape(10000, 1, 28, 28)
    test_x = test_x.reshape(10000, 1, 28, 28)

    input_var = T.tensor4('input_var')
    target_var = T.matrix('target_var')
    dataset_size = T.scalar('dataset_size')
    lr = T.scalar('lr')

    # 784 -> 20 -> 10
    weight_shapes = [
        (16, 1, 5, 5),  # -> (None, 16, 14, 14)
        (16, 16, 5, 5),  # -> (None, 16,  7,  7)
        (16, 16, 5, 5)
    ]  # -> (None, 16,  4,  4)

    num_params = sum(np.prod(ws) for ws in weight_shapes) + 10
    wd1 = 1

    # stochastic hypernet
    ep = srng.normal(std=0.01, size=(wd1, num_params), dtype=floatX)
    logdets_layers = []
    h_layer = lasagne.layers.InputLayer([None, num_params])

    layer_temp = LinearFlowLayer(h_layer)
    h_layer = IndexLayer(layer_temp, 0)
    logdets_layers.append(IndexLayer(layer_temp, 1))

    if coupling:
        layer_temp = CoupledDenseLayer(h_layer, 200)
        h_layer = IndexLayer(layer_temp, 0)
        logdets_layers.append(IndexLayer(layer_temp, 1))

        h_layer = PermuteLayer(h_layer, num_params)

        layer_temp = CoupledDenseLayer(h_layer, 200)
        h_layer = IndexLayer(layer_temp, 0)
        logdets_layers.append(IndexLayer(layer_temp, 1))

    weights = lasagne.layers.get_output(h_layer, ep)

    # primary net
    t = np.cast['int32'](0)
    layer = lasagne.layers.InputLayer([None, 1, 28, 28])
    inputs = {layer: input_var}
    for ws in weight_shapes:
        num_param = np.prod(ws)
        weight = weights[:, t:t + num_param].reshape(ws)
        num_filters = ws[0]
        filter_size = ws[2]
        stride = 2
        pad = 'same'
        layer = stochasticConv2DLayer([layer, weight], num_filters,
                                      filter_size, stride, pad)
        print layer.output_shape
        t += num_param

    w_layer = lasagne.layers.InputLayer((None, 10))
    weight = weights[:, t:t + 10].reshape((wd1, 10))
    inputs[w_layer] = weight
    layer = stochasticDenseLayer2([layer, w_layer],
                                  10,
                                  nonlinearity=nonlinearities.softmax)

    y = T.clip(get_output(layer, inputs), 0.001, 0.999)

    # loss terms
    logdets = sum([get_output(logdet, ep) for logdet in logdets_layers])
    logqw = -(0.5 *
              (ep**2).sum(1) + 0.5 * T.log(2 * np.pi) * num_params + logdets)
    logpw = log_normal(weights, 0., -T.log(lbda)).sum(1)
    #logpw = log_stdnormal(weights).sum(1)
    kl = (logqw - logpw).mean()
    logpyx = -cc(y, target_var).mean()
    loss = -(logpyx - kl / T.cast(dataset_size, floatX))

    params = lasagne.layers.get_all_params([layer])[1:]  # excluding rand state
    grads = T.grad(loss, params)

    mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]
    updates = lasagne.updates.adam(cgrads, params, learning_rate=lr)

    train = theano.function([input_var, target_var, dataset_size, lr],
                            loss,
                            updates=updates)
    predict = theano.function([input_var], y.argmax(1))

    records = train_model(train, predict, train_x[:size], train_y[:size],
                          valid_x, valid_y, lr0, lrdecay, bs)

    output_probs = theano.function([input_var], y)
    MCt = np.zeros((100, 1000, 10))
    MCv = np.zeros((100, 1000, 10))
    for i in range(100):
        MCt[i] = output_probs(train_x[:1000])
        MCv[i] = output_probs(valid_x[:1000])

    tr = np.equal(MCt.mean(0).argmax(-1), train_y[:1000].argmax(-1)).mean()
    va = np.equal(MCv.mean(0).argmax(-1), valid_y[:1000].argmax(-1)).mean()
    print "train perf=", tr
    print "valid perf=", va

    for ii in range(15):
        print np.round(MCt[ii][0] * 1000)