Ejemplo n.º 1
0
def main():
    # -- top-level parameters of this script
    dtype = 'float32'  # XXX
    n_examples = 50000
    online_batch_size = 1
    online_epochs = 2
    batch_epochs = 30
    lbfgs_m = 20

    # -- load and prepare the data set
    data_view = mnist.views.OfficialVectorClassification(x_dtype=dtype)
    n_classes = 10
    x = data_view.train.x[:n_examples]
    y = data_view.train.y[:n_examples]
    y1 = -1 * ones((len(y), n_classes)).astype(dtype)
    y1[arange(len(y)), y] = 1

    # --initialize the SVM model
    w = zeros((x.shape[1], n_classes), dtype=dtype)
    b = zeros(n_classes, dtype=dtype)

    def svm(ww, bb, xx=x, yy=y1):
        # -- one vs. all linear SVM loss
        margin = yy * (dot(xx, ww) + bb)
        hinge = maximum(0, 1 - margin)
        cost = hinge.mean(axis=0).sum()
        return cost

    # -- stage-1 optimization by stochastic gradient descent
    print 'Starting SGD'
    n_batches = n_examples / online_batch_size
    w, b = fmin_sgd(
        svm,
        (w, b),
        streams={
            'xx': x.reshape((n_batches, online_batch_size, x.shape[1])),
            'yy': y1.reshape((n_batches, online_batch_size, y1.shape[1]))
        },
        loops=online_epochs,
        stepsize=0.001,
        print_interval=10000,
    )

    print 'SGD complete, about to start L-BFGS'
    show_filters(w.T, (28, 28), (
        2,
        5,
    ))

    # -- stage-2 optimization by L-BFGS
    print 'Starting L-BFGS'
    w, b = fmin_l_bfgs_b(svm, (w, b), maxfun=batch_epochs, iprint=1, m=lbfgs_m)

    print 'L-BFGS complete'
    show_filters(w.T, (28, 28), (
        2,
        5,
    ))
Ejemplo n.º 2
0
    def fit(self, x, y, xw=None):
        """
        x - n_examples x n_features design matrix.
        y - vector of integer labels
        xw - matrix of real-valued incoming biases obtained
            by multiplying the existing weight vectors by x
        """
        assert set(y) <= set([-1, 1])

        if x.shape[0] != y.shape[0]:
            raise ValueError('length mismatch between x and y')
        n_examples, n_features = x.shape
        if n_features != self.n_features:
            raise ValueError('n_feature mismatch', (n_features,
                self.n_features))

        weights = self.weights
        bias = self.bias
        alpha = self.alpha

        x = x.astype(self.dtype)
        y = y.astype(self.dtype)

        xw = self.as_xw(x, xw)
        print 'WARNING: IncrementalSVM should use alpha0, n_sgd_iters'

        # -- warm up with some sgd
        weights, bias, alpha, = autodiff.fmin_sgd(
                lambda w, b, a, xi, yi, xwi:
                    binary_svm_hinge_loss(xi, yi, w, b, a, None,
                        None,
                        self.l2_regularization),
                (weights, bias, alpha),
                streams={
                    'xi': x.reshape((n_examples, 1, x.shape[1])),
                    'yi': y.reshape((n_examples, 1)),
                    },
                stepsize=0.01,
                loops=max(1, 100000 // len(x)),
                )

        # -- fine-tune without alpha by L-BFGS
        weights, bias, alpha, = autodiff.fmin_l_bfgs_b(
                lambda w, b, a:
                    binary_svm_hinge_loss(x, y,
                        w, b, a, None, None,
                        self.l2_regularization),
                (weights, bias, alpha),
                # -- the graph is tiny, time spent optimizing it is wasted.
                theano_mode=theano.Mode(linker='cvm', optimizer='fast_run'),
                **self.bfgs_kwargs)


        self.weights = weights
        self.bias = bias
        self.alpha = alpha
Ejemplo n.º 3
0
def main():
    # -- top-level parameters of this script
    dtype = 'float32'  # XXX
    n_examples = 50000
    online_batch_size = 1
    online_epochs = 2
    batch_epochs = 30
    lbfgs_m = 20
    n_mlp_hiddens = [200]  # -- one entry per hidden layer

    # -- load and prepare the data set
    data_view = mnist.views.OfficialVectorClassification(x_dtype=dtype)
    n_classes = 10
    x = data_view.train.x[:n_examples]
    y = data_view.train.y[:n_examples]
    y1 = -1 * ones((len(y), n_classes)).astype(dtype)
    y1[arange(len(y)), y] = 1

    # -- allocate the model by running one example through it
    init_params = {}
    mlp_svm(init_params, x[:1], y[:1], n_mlp_hiddens, n_classes)

    if online_epochs:
        # -- stage-1 optimization by stochastic gradient descent
        print 'Starting SGD'
        n_batches = n_examples / online_batch_size
        stage1_params, = fmin_sgd(mlp_svm, (init_params,),
                streams={
                    'x': x.reshape((n_batches, online_batch_size, x.shape[1])),
                    'y1': y1.reshape((n_batches, online_batch_size, y1.shape[1]))},
                loops=online_epochs,
                stepsize=0.001,
                print_interval=10000,
                )

        print 'SGD complete, about to start L-BFGS'
        show_filters(stage1_params['mlp']['weights'][0].T, (28, 28), (8, 25,))
    else:
        print 'Skipping stage-1 SGD'
        stage1_params = init_params

    # -- stage-2 optimization by L-BFGS
    if batch_epochs:
        def batch_mlp_svm(p):
            return mlp_svm(p, x, y1)

        print 'Starting L-BFGS'
        stage2_params, = fmin_l_bfgs_b(lambda p: mlp_svm(p, x, y1),
                args=(stage1_params,),
                maxfun=batch_epochs,
                iprint=1,
                m=lbfgs_m)

        print 'L-BFGS complete'
        show_filters(stage2_params['mlp']['weights'][0].T, (28, 28), (8, 25,))
Ejemplo n.º 4
0
def fit_l_bfgs_b(weights, bias, alpha, x, y, l2reg,
        pxw, pw_l2_sqr, pl2reg, bfgs_kwargs,
        return_after_one_fit=False):
    """
    Refine `weights, bias, alpha` by l_bfgs_b
    """
    n_features, n_classes = weights.shape
    n_prev, n_classes = alpha.shape

    alpha_orig = alpha
    # -- the inplace alpha2 scaling modifies not-yet-fit weights
    #    as the while loop below works its way across
    weights = weights.copy()

    low = 0
    high = n_features

    # -- keep trying to train on less and less of the data until it works
    while True:
        x0 = x[:, low:high]

        x2 = x[:, high:]
        pxw2 = append_xw(pxw, x2, weights[high:])
        pl2reg2 = append_l2_regularization(pl2reg, l2reg)
        alpha2 = append_alpha(alpha)
        pw_l2_sqr2 = append_w_l2_sqr(pw_l2_sqr, weights[high:])

        def fn(w, b, a):
            return multi_svm_hinge_loss(x0, y, w, b, a,
                    pxw2, pw_l2_sqr2, l2reg, pl2reg2)
        try:
            if l_bfgs_b_debug_feature_limit is not None:
                # -- this mechanism is used by unit tests
                if (high - low) > l_bfgs_b_debug_feature_limit:
                    raise MemoryError()
            (weights_, bias, alpha2), info = autodiff.fmin_l_bfgs_b(fn,
                    (weights[low:high], bias, alpha2),
                    return_info=True,
                    borrowable=[x0],
                    floatX=x.dtype,
                    **bfgs_kwargs)
            info['feature_high'] = high
            info['feature_low'] = low
            gc.collect()
            logger.info('fitting successful for %i features' % high)
            break
        except (MemoryError, RuntimeError), e:
            high /= 2
            if low == high:
                raise
            gc.collect()
            logger.info('fitting required too much memory, falling back to %i' % high)
            continue
Ejemplo n.º 5
0
def main():
    # -- top-level parameters of this script
    dtype = "float32"  # XXX
    n_examples = 50000
    online_batch_size = 1
    online_epochs = 2
    batch_epochs = 30
    lbfgs_m = 20

    # -- load and prepare the data set
    data_view = mnist.views.OfficialVectorClassification(x_dtype=dtype)
    n_classes = 10
    x = data_view.train.x[:n_examples]
    y = data_view.train.y[:n_examples]
    y1 = -1 * ones((len(y), n_classes)).astype(dtype)
    y1[arange(len(y)), y] = 1

    # --initialize the SVM model
    w = zeros((x.shape[1], n_classes), dtype=dtype)
    b = zeros(n_classes, dtype=dtype)

    def svm(ww, bb, xx=x, yy=y1):
        # -- one vs. all linear SVM loss
        margin = yy * (dot(xx, ww) + bb)
        hinge = maximum(0, 1 - margin)
        cost = hinge.mean(axis=0).sum()
        return cost

    # -- stage-1 optimization by stochastic gradient descent
    print "Starting SGD"
    n_batches = n_examples / online_batch_size
    w, b = fmin_sgd(
        svm,
        (w, b),
        streams={
            "xx": x.reshape((n_batches, online_batch_size, x.shape[1])),
            "yy": y1.reshape((n_batches, online_batch_size, y1.shape[1])),
        },
        loops=online_epochs,
        stepsize=0.001,
        print_interval=10000,
    )

    print "SGD complete, about to start L-BFGS"
    show_filters(w.T, (28, 28), (2, 5))

    # -- stage-2 optimization by L-BFGS
    print "Starting L-BFGS"
    w, b = fmin_l_bfgs_b(svm, (w, b), maxfun=batch_epochs, iprint=1, m=lbfgs_m)

    print "L-BFGS complete"
    show_filters(w.T, (28, 28), (2, 5))
Ejemplo n.º 6
0
    def fit(self, x, y, xw=None):
        """
        x - n_examples x n_features design matrix.
        y - vector of integer labels
        xw - matrix of real-valued incoming biases obtained
            by multiplying the existing weight vectors by x
        """
        assert set(y) <= set([-1, 1])

        if x.shape[0] != y.shape[0]:
            raise ValueError('length mismatch between x and y')
        n_examples, n_features = x.shape
        if n_features != self.n_features:
            raise ValueError('n_feature mismatch',
                             (n_features, self.n_features))

        weights = self.weights
        bias = self.bias
        alpha = self.alpha

        x = x.astype(self.dtype)
        y = y.astype(self.dtype)

        xw = self.as_xw(x, xw)
        print 'WARNING: IncrementalSVM should use alpha0, n_sgd_iters'

        # -- warm up with some sgd
        weights, bias, alpha, = autodiff.fmin_sgd(
            lambda w, b, a, xi, yi, xwi: binary_svm_hinge_loss(
                xi, yi, w, b, a, None, None, self.l2_regularization),
            (weights, bias, alpha),
            streams={
                'xi': x.reshape((n_examples, 1, x.shape[1])),
                'yi': y.reshape((n_examples, 1)),
            },
            stepsize=0.01,
            loops=max(1, 100000 // len(x)),
        )

        # -- fine-tune without alpha by L-BFGS
        weights, bias, alpha, = autodiff.fmin_l_bfgs_b(
            lambda w, b, a: binary_svm_hinge_loss(x, y, w, b, a, None, None,
                                                  self.l2_regularization),
            (weights, bias, alpha),
            # -- the graph is tiny, time spent optimizing it is wasted.
            theano_mode=theano.Mode(linker='cvm', optimizer='fast_run'),
            **self.bfgs_kwargs)

        self.weights = weights
        self.bias = bias
        self.alpha = alpha
Ejemplo n.º 7
0
def test_svm():
    """
    This test case should match examples/linear_svm.py
    """

    rng = np.random.RandomState(1)

    # -- create some fake data
    x = rng.rand(10, 5)
    y = 2 * (rng.rand(10) > 0.5) - 1
    l2_regularization = 1e-4

    def loss_fn(weights, bias):
        margin = y * (np.dot(x, weights) + bias)
        loss = np.maximum(0, 1 - margin) ** 2
        l2_cost = 0.5 * l2_regularization * np.dot(weights, weights)
        loss = np.mean(loss) + l2_cost
        print 'ran loss_fn(), returning', loss
        return loss

    w, b = fmin_l_bfgs_b(loss_fn, (np.zeros(5), np.zeros(())))
    final_loss = loss_fn(w, b)
    assert np.allclose(final_loss, 0.7229)
def main():
    # -- top-level parameters of this script
    n_hidden1 = n_hidden2 = 25
    dtype = "float32"
    n_examples = 10000
    online_batch_size = 1
    online_epochs = 3

    # -- TIP: partial creates a new function with some parameters filled in
    # algo = partial(denoising_autoencoder_binary_x, noise_level=0.3)
    algo = logistic_autoencoder_binary_x

    batch_epochs = 10
    lbfgs_m = 20

    n_hidden = n_hidden1 * n_hidden2
    rng = np.random.RandomState(123)

    data_view = mnist.views.OfficialVectorClassification(x_dtype=dtype)
    x = data_view.train.x[:n_examples]
    n_examples, n_visible = x.shape
    x_img_res = 28, 28

    # -- uncomment this line to see sample images from the data set
    # show_filters(x[:100], x_img_res, (10, 10))

    # -- create a new model  (w, visbias, hidbias)
    w = rng.uniform(
        low=-4 * np.sqrt(6.0 / (n_hidden + n_visible)),
        high=4 * np.sqrt(6.0 / (n_hidden + n_visible)),
        size=(n_visible, n_hidden),
    ).astype(dtype)
    visbias = np.zeros(n_visible).astype(dtype)
    hidbias = np.zeros(n_hidden).astype(dtype)

    # show_filters(w.T, x_img_res, (n_hidden1, n_hidden2))
    x_stream = x.reshape((n_examples / online_batch_size, online_batch_size, x.shape[1]))

    def train_criterion(ww, hbias, vbias, x_i=x):
        cost, hid = algo(x_i, ww, hbias, vbias)
        l1_cost = abs(ww).sum() * 0.0  # -- raise 0.0 to enforce l1 penalty
        l2_cost = (ww ** 2).sum() * 0.0  # -- raise 0.0 to enforce l2 penalty
        return cost.mean() + l1_cost + l2_cost

    # -- ONLINE TRAINING
    for epoch in range(online_epochs):
        t0 = time.time()
        w, hidbias, visbias = autodiff.fmin_sgd(
            train_criterion,
            args=(w, hidbias, visbias),
            stream=x_stream,  # -- fmin_sgd will loop through this once
            stepsize=0.005,  # -- QQ: you should always tune this
            print_interval=1000,
        )
        print "Online training epoch %i took %f seconds" % (epoch, time.time() - t0)
        show_filters(w.T, x_img_res, (n_hidden1, n_hidden2))

    # -- BATCH TRAINING
    w, hidbias, visbias = autodiff.fmin_l_bfgs_b(
        train_criterion,
        args=(w, hidbias, visbias),
        # -- scipy.fmin_l_bfgs_b kwargs follow
        maxfun=batch_epochs,
        iprint=1,  # -- 1 for verbose, 0 for normal, -1 for quiet
        m=lbfgs_m,  # -- how well to approximate the Hessian
    )

    show_filters(w.T, x_img_res, (n_hidden1, n_hidden2))
Ejemplo n.º 9
0
#            streams={
#                'x': x.reshape((n_batches, online_batch_size, x.shape[1])),
#                'y1': y1.reshape((n_batches, online_batch_size, y1.shape[1]))},
#           	 loops=n_online_epochs,
#		step_size=0.01,
#		print_interval=n_examples,
#            )
#print 'SGD took %.2f seconds' % (time.time() - t0)
#show_filters(W.T, img_shape, (2, 5))

# -- L-BFGS optimization of our SVM cost.

def batch_criterion(W, b):
    return ova_svm_cost(W, b, x, y1)

W, b = autodiff.fmin_l_bfgs_b(batch_criterion, (W, b), maxfun=20, m=20, iprint=1)

print 'final_cost', batch_criterion(W, b)
# -- N. B. the output from this command comes from Fortran, so iPython does not see it.
#    To monitor progress, look at the terminal from which you launched ipython
#show_filters(W.T, img_shape, (2, 5))

train_predictions = ova_svm_prediction(W, b, x)
train_errors = y != train_predictions
print 'Current train set error rate', np.mean(train_errors)

test_predictions = ova_svm_prediction(W, b, iris.data[:,:2])
test_errors = iris.target != test_predictions
print 'Current test set error rate', np.mean(test_errors)

Ejemplo n.º 10
0
def main():
    # -- top-level parameters of this script
    n_hidden1 = n_hidden2 = 25
    dtype = 'float32'
    n_examples = 10000
    online_batch_size = 1
    online_epochs = 3

    # -- TIP: partial creates a new function with some parameters filled in
    # algo = partial(denoising_autoencoder_binary_x, noise_level=0.3)
    algo = logistic_autoencoder_binary_x

    batch_epochs = 10
    lbfgs_m = 20

    n_hidden = n_hidden1 * n_hidden2
    rng = np.random.RandomState(123)

    data_view = mnist.views.OfficialVectorClassification(x_dtype=dtype)
    x = data_view.train.x[:n_examples]
    n_examples, n_visible = x.shape
    x_img_res = 28, 28

    # -- uncomment this line to see sample images from the data set
    # show_filters(x[:100], x_img_res, (10, 10))

    # -- create a new model  (w, visbias, hidbias)
    w = rng.uniform(low=-4 * np.sqrt(6. / (n_hidden + n_visible)),
                    high=4 * np.sqrt(6. / (n_hidden + n_visible)),
                    size=(n_visible, n_hidden)).astype(dtype)
    visbias = np.zeros(n_visible).astype(dtype)
    hidbias = np.zeros(n_hidden).astype(dtype)

    # show_filters(w.T, x_img_res, (n_hidden1, n_hidden2))
    x_stream = x.reshape(
        (n_examples / online_batch_size, online_batch_size, x.shape[1]))

    def train_criterion(ww, hbias, vbias, x_i=x):
        cost, hid = algo(x_i, ww, hbias, vbias)
        l1_cost = abs(ww).sum() * 0.0  # -- raise 0.0 to enforce l1 penalty
        l2_cost = (ww**2).sum() * 0.0  # -- raise 0.0 to enforce l2 penalty
        return cost.mean() + l1_cost + l2_cost

    # -- ONLINE TRAINING
    for epoch in range(online_epochs):
        t0 = time.time()
        w, hidbias, visbias = autodiff.fmin_sgd(
            train_criterion,
            args=(w, hidbias, visbias),
            stream=x_stream,  # -- fmin_sgd will loop through this once
            stepsize=0.005,  # -- QQ: you should always tune this
            print_interval=1000,
        )
        print 'Online training epoch %i took %f seconds' % (epoch,
                                                            time.time() - t0)
        show_filters(w.T, x_img_res, (n_hidden1, n_hidden2))

    # -- BATCH TRAINING
    w, hidbias, visbias = autodiff.fmin_l_bfgs_b(
        train_criterion,
        args=(w, hidbias, visbias),
        # -- scipy.fmin_l_bfgs_b kwargs follow
        maxfun=batch_epochs,
        iprint=1,  # -- 1 for verbose, 0 for normal, -1 for quiet
        m=lbfgs_m,  # -- how well to approximate the Hessian
    )

    show_filters(w.T, x_img_res, (n_hidden1, n_hidden2))
Ejemplo n.º 11
0
# -- create some fake data
x = np.random.rand(10, 5)
y = 2 * (np.random.rand(10) > 0.5) - 1
l2_regularization = 1e-4

def loss_fn(weights, bias):
    margin = y * (np.dot(x, weights) + bias)
    loss = np.maximum(0, 1 - margin) ** 2
    l2_cost = 0.5 * l2_regularization * np.dot(weights, weights)
    loss = np.mean(loss) + l2_cost
    print 'ran loss_fn(), returning', loss
    return loss

# -- Run loss_fn once to trace computations.
w, b = fmin_l_bfgs_b(loss_fn, [np.zeros(5), np.zeros(())])

# -- run loss_fn as usual
final_loss = loss_fn(w, b)

print 'Best-fit SVM:'
print ' -> cost:', final_loss
print ' -> weights:', w
print ' -> bias:', b

# Program output:
#
# ran loss_fn(), returning 1.0
# ran loss_fn(), returning 0.722904977725
# Best-fit SVM:
# -> cost: 0.722904977725
Ejemplo n.º 12
0
        pl2reg1 = append_l2_regularization(pl2reg, l2reg)
        alpha = append_alpha(alpha)
        pw_l2_sqr1 = append_w_l2_sqr(pw_l2_sqr, weights_)

        x2 = x[:, high:]
        pxw2 = append_xw(pxw1, x2, weights[high:])
        pl2reg2 = append_l2_regularization(pl2reg1, l2reg)
        alpha2 = append_alpha(alpha)
        pw_l2_sqr2 = append_w_l2_sqr(pw_l2_sqr1, weights[high:])

        def fn(w, b, a):
            return multi_svm_hinge_loss(x1, y, w, b, a,
                    pxw2, pw_l2_sqr2, l2reg, pl2reg2)
        (weights_, bias, alpha2), info = autodiff.fmin_l_bfgs_b(fn,
                (weights[low:high], bias, alpha2),
                return_info=True,
                borrowable=[x1],
                floatX=x.dtype,
                **bfgs_kwargs)

        info['feature_high'] = high
        info['feature_low'] = low

        # -- pop off the alpha we just added
        weights[high:] *= alpha2[-1]
        alpha = alpha2[:-1].copy()

        w0s.append(weights_)
        costs.append(info['fopt'])
        infos.append(info)
        x0 = x1
        pxw = pxw1