Esempio n. 1
0
File: lr.py Progetto: zphilip/climin
def main():
    # Hyper parameters.
    optimizer = 'lbfgs'        # or use: ncg, lbfgs, rmsprop
    batch_size = 10000

    flat, (w, b) = climin.util.empty_with_views(tmpl)
    climin.initialize.randomize_normal(flat, 0, 0.1)

    datafile = 'mnist.pkl.gz'
    # Load data.
    with gzip.open(datafile, 'rb') as f:
        train_set, val_set, test_set = cPickle.load(f)

    X, Z = train_set
    VX, VZ = val_set
    TX, TZ = test_set

    def one_hot(arr):
        result = np.zeros((arr.shape[0], 10))
        result[xrange(arr.shape[0]), arr] = 1.
        return result

    Z = one_hot(Z)
    VZ = one_hot(VZ)
    TZ = one_hot(TZ)

    if batch_size is None:
        args = itertools.repeat(([X, Z], {}))
        batches_per_pass = 1
    else:
        args = climin.util.iter_minibatches([X, Z], batch_size, [0, 0])
        args = ((i, {}) for i in args)
        batches_per_pass = X.shape[0] / batch_size

    if optimizer == 'gd':
        opt = climin.GradientDescent(flat, d_loss_wrt_pars, steprate=0.1,
                                     momentum=.95, args=args)
    elif optimizer == 'lbfgs':
        opt = climin.Lbfgs(flat, loss, d_loss_wrt_pars, args=args)
    elif optimizer == 'ncg':
        opt = climin.NonlinearConjugateGradient(flat, loss, d_loss_wrt_pars,
                                                args=args)
    elif optimizer == 'rmsprop':
        opt = climin.RmsProp(flat, d_loss_wrt_pars, steprate=1e-4, decay=0.9,
                             args=args)
    elif optimizer == 'rprop':
        opt = climin.Rprop(flat, d_loss_wrt_pars, args=args)
    else:
        print 'unknown optimizer'
        return 1

    for info in opt:
        if info['n_iter'] % batches_per_pass == 0:
            print '%i/%i test loss: %g' % (
                info['n_iter'], batches_per_pass * 10, loss(flat, VX, VZ))
        if info['n_iter'] >= 10 * batches_per_pass:
            break
Esempio n. 2
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=500,
             dataset='mnist.pkl.gz',
             batch_size=20,
             n_hidden=500,
             optimizer='gd',
             activation=T.tanh):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    tmpl = [(28 * 28, n_hidden), n_hidden, (n_hidden, 10), 10]
    flat, (Weights_1, bias_1, Weights_2,
           bias_2) = climin.util.empty_with_views(tmpl)

    #Initialize weights with uniformal distribution according to the tutorial
    rng = numpy.random.RandomState(1234)
    Weights_1_init = rng.uniform(low=-numpy.sqrt(6. / (28 * 28 + n_hidden)),
                                 high=numpy.sqrt(6. / (28 * 28 + n_hidden)),
                                 size=(28 * 28, n_hidden))

    Weights_2_init = rng.uniform(low=-numpy.sqrt(6. / (n_hidden + 10)),
                                 high=numpy.sqrt(6. / (n_hidden + 10)),
                                 size=(n_hidden, 10))

    bias_1_init = numpy.zeros((n_hidden, ), dtype=theano.config.floatX)
    bias_2_init = numpy.zeros((10, ), dtype=theano.config.floatX)

    if activation == T.nnet.sigmoid:
        Weights_1_init *= 4
        Weights_2_init *= 4

    def initialize_in_place(array, values):
        for j in range(0, len(values)):
            array[j] = values[j]

    initialize_in_place(Weights_1, Weights_1_init)
    initialize_in_place(Weights_2, Weights_2_init)
    initialize_in_place(bias_1, bias_1_init)
    initialize_in_place(bias_2, bias_2_init)

    if batch_size is None:
        args = itertools.repeat(([train_set_x, train_set_y], {}))
        n_train_batches = 1
    else:
        args = cli.util.iter_minibatches([train_set_x, train_set_y],
                                         batch_size, [0, 0])
        args = ((i, {}) for i in args)
        n_train_batches = train_set_x.shape[0] // batch_size

    print('... building the model')

    x = T.matrix('x')
    y = T.ivector('y')

    rng = numpy.random.RandomState(1234)

    classifier = MLP(rng=rng,
                     input=x,
                     n_in=28 * 28,
                     n_hidden=n_hidden,
                     n_out=10,
                     Weights_1=theano.shared(value=Weights_1,
                                             name='W',
                                             borrow=True),
                     bias_1=theano.shared(value=bias_1, name='b', borrow=True),
                     Weights_2=theano.shared(value=Weights_2,
                                             name='W',
                                             borrow=True),
                     bias_2=theano.shared(value=bias_2, name='b', borrow=True),
                     activation=T.tanh)

    #cost with regularisation terms
    cost = theano.function(inputs=[x, y],
                           outputs=classifier.negative_log_likelihood(y) +
                           L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                           allow_input_downcast=True)

    # gradients with regularisation terms
    gradients = theano.function(
        inputs=[x, y],
        outputs=[
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.hiddenLayer.W),
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.hiddenLayer.b),
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.logRegressionLayer.W),
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.logRegressionLayer.b)
        ],
        allow_input_downcast=True)

    def loss(parameters, input, target):
        return cost(input, target)

    def d_loss_wrt_pars(parameters, inputs, targets):
        g_W_1, g_b_1, g_W_2, g_b_2 = gradients(inputs, targets)

        return numpy.concatenate(
            [g_W_1.flatten(), g_b_1,
             g_W_2.flatten(), g_b_2])

    zero_one_loss = theano.function(inputs=[x, y],
                                    outputs=classifier.errors(y),
                                    allow_input_downcast=True)

    if optimizer == 'gd':
        print('... using gradient descent')
        opt = cli.GradientDescent(flat,
                                  d_loss_wrt_pars,
                                  step_rate=learning_rate,
                                  momentum=.95,
                                  args=args)
    elif optimizer == 'bfgs':
        print('... using using quasi-newton BFGS')
        opt = cli.Bfgs(flat, loss, d_loss_wrt_pars, args=args)
    elif optimizer == 'lbfgs':
        print('... using using quasi-newton L-BFGS')
        opt = cli.Lbfgs(flat, loss, d_loss_wrt_pars, args=args)
    elif optimizer == 'nlcg':
        print('... using using non linear conjugate gradient')
        opt = cli.NonlinearConjugateGradient(flat,
                                             loss,
                                             d_loss_wrt_pars,
                                             min_grad=1e-03,
                                             args=args)
    elif optimizer == 'rmsprop':
        print('... using rmsprop')
        opt = cli.RmsProp(flat,
                          d_loss_wrt_pars,
                          step_rate=1e-4,
                          decay=0.9,
                          args=args)
    elif optimizer == 'rprop':
        print('... using resilient propagation')
        opt = cli.Rprop(flat, d_loss_wrt_pars, args=args)
    elif optimizer == 'adam':
        print('... using adaptive momentum estimation optimizer')
        opt = cli.Adam(flat,
                       d_loss_wrt_pars,
                       step_rate=0.0002,
                       decay=0.99999999,
                       decay_mom1=0.1,
                       decay_mom2=0.001,
                       momentum=0,
                       offset=1e-08,
                       args=args)
    elif optimizer == 'adadelta':
        print('... using adadelta')
        opt = cli.Adadelta(flat,
                           d_loss_wrt_pars,
                           step_rate=1,
                           decay=0.9,
                           momentum=.95,
                           offset=0.0001,
                           args=args)
    else:
        print('unknown optimizer')
        return 1

    print('... training')

    # early stopping parameters
    if batch_size == None:
        patience = 250
    else:
        patience = 10000  # look at this many samples regardless

    patience_increase = 2  # wait this mutch longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this mutch is considered signigicant
    validation_frequency = min(n_train_batches, patience // 2)
    best_validation_loss = numpy.inf
    test_loss = 0.

    valid_losses = []
    train_losses = []
    test_losses = []

    epoch = 0

    start_time = timeit.default_timer()

    for info in opt:
        iter = info['n_iter']
        epoch = iter // n_train_batches
        minibatch_index = iter % n_train_batches

        if iter % validation_frequency == 0:
            validation_loss = zero_one_loss(valid_set_x, valid_set_y)
            valid_losses.append(validation_loss)
            train_losses.append(zero_one_loss(train_set_x, train_set_y))
            test_losses.append(zero_one_loss(test_set_x, test_set_y))

            print(
                'epoch %i, minibatch %i/%i, validation error % f %%, iter/patience %i/%i'
                % (epoch, minibatch_index + 1, n_train_batches,
                   validation_loss * 100, iter, patience))
            # if we got the best validation score until now
            if validation_loss < best_validation_loss:
                # improve patience if loss improvement is good enough
                if validation_loss < best_validation_loss * improvement_threshold:
                    patience = max(patience, iter * patience_increase)
                best_validation_loss = validation_loss
                # test it on the test set
                test_loss = zero_one_loss(test_set_x, test_set_y)

                print(
                    '    epoch %i, minibatch %i/%i, test error of best model %f %%'
                    % (epoch, minibatch_index + 1, n_train_batches,
                       test_loss * 100))

        if patience <= iter or epoch >= n_epochs:
            break

    end_time = timeit.default_timer()
    print((
        'Optimization complete. Best validation score of %f %% with test performance %f %%'
    ) % (best_validation_loss * 100., test_loss * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)

    losses = (train_losses, valid_losses, test_losses)

    return classifier, losses
Esempio n. 3
0
def sgd_optimization_mnist(learning_rate=0.01, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600, optimizer='gd'):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    tmpl = [(28 * 28, 10), 10]
    flat, (Weights, bias) = climin.util.empty_with_views(tmpl)

    cli.initialize.randomize_normal(flat, 0, 1)

    if batch_size is None:
        args = itertools.repeat(([train_set_x, train_set_y], {}))
        n_train_batches = 1
    else:
        args = cli.util.iter_minibatches([train_set_x, train_set_y], batch_size, [0, 0])
        args = ((i, {}) for i in args)
        n_train_batches = train_set_x.shape[0] // batch_size

    print('... building the model')

    x = T.matrix('x')
    y = T.ivector('y')

    classifier = LogisticRegression(
            input = x,
            n_in = 28 * 28,
            n_out = 10,
            W = theano.shared(value = Weights, name = 'W', borrow = True),
            b = theano.shared(value = bias, name = 'b', borrow = True)
            )

    gradients = theano.function(
            inputs = [x, y],
            outputs = [
                T.grad(classifier.negative_log_likelihood(y), classifier.W),
                T.grad(classifier.negative_log_likelihood(y), classifier.b)
                ],
            allow_input_downcast = True
            )

    cost = theano.function(
        inputs=[x, y],
        outputs=classifier.negative_log_likelihood(y),
        allow_input_downcast=True
    )

    def loss(parameters, input, target):
        return cost(input, target)

    def d_loss_wrt_pars(parameters, inputs, targets):
        g_W, g_b = gradients(inputs, targets)

        return np.concatenate([g_W.flatten(), g_b])

    zero_one_loss = theano.function(
            inputs = [x, y],
            outputs = classifier.errors(y),
            allow_input_downcast = True
            )

    if optimizer == 'gd':
        print('... using gradient descent')
        opt = cli.GradientDescent(flat, d_loss_wrt_pars, step_rate=learning_rate, momentum=.95, args=args)
    elif optimizer == 'rmsprop':
        print('... using rmsprop')
        opt = cli.RmsProp(flat, d_loss_wrt_pars, step_rate=1e-4, decay=0.9, args=args)
    elif optimizer == 'rprop':
        print('... using resilient propagation')
        opt = cli.Rprop(flat, d_loss_wrt_pars, args=args)
    elif optimizer == 'adam':
        print('... using adaptive momentum estimation optimizer')
        opt = cli.Adam(flat, d_loss_wrt_pars, step_rate = 0.0002, decay = 0.99999999, decay_mom1 = 0.1, decay_mom2 = 0.001, momentum = 0, offset = 1e-08, args=args)
    elif optimizer == 'adadelta':
        print('... using adadelta')
        opt = cli.Adadelta(flat, d_loss_wrt_pars, step_rate=1, decay = 0.9, momentum = .95, offset = 0.0001, args=args)
    else:
        print('unknown optimizer')
        return 1

    print('... training the model')

    # early stopping parameters
    if batch_size== None:
        patience = 250
    else:
        patience = 5000 # look at this many samples regardless

    patience_increase = 2 # wait this mutch longer when a new best is found
    improvement_threshold = 0.995 # a relative improvement of this mutch is considered signigicant
    validation_frequency = min(n_train_batches, patience // 2)
    best_validation_loss = np.inf
    test_loss = 0.

    valid_losses = []
    train_losses = []
    test_losses = []

    epoch = 0

    start_time = timeit.default_timer()
    for info in opt:
        iter = info['n_iter']
        epoch = iter // n_train_batches
        minibatch_index = iter % n_train_batches

        if iter % validation_frequency == 0:
            # compute zero-one loss on validation set
            validation_loss = zero_one_loss(valid_set_x, valid_set_y)
            valid_losses.append(validation_loss)
            train_losses.append(zero_one_loss(train_set_x, train_set_y))
            test_losses.append(zero_one_loss(test_set_x, test_set_y))

            print(
                    'epoch %i, minibatch %i/%i, validation error % f %%, iter/patience %i/%i' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        validation_loss * 100,
                        iter,
                        patience
                        )
                    )
            # if we got the best validation score until now
            if validation_loss < best_validation_loss:
               # improve patience if loss improvement is good enough
                if validation_loss < best_validation_loss * improvement_threshold:
                    patience = max(patience, iter * patience_increase)
                best_validation_loss = validation_loss
                # test it on the test set
                test_loss = zero_one_loss(test_set_x, test_set_y)

                print(
                        '    epoch %i, minibatch %i/%i, test error of best model %f %%' %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_loss * 100
                            )
                        )

        if patience <= iter or epoch >= n_epochs:
            break

    end_time = timeit.default_timer()

    print('Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100., test_loss * 100.))
    print('The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time)))
    print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)

    losses = (train_losses, valid_losses, test_losses)

    return classifier, losses
Esempio n. 4
0
def run_devise(image_vecs,
               image_labels,
               word_vecs,
               n_epochs,
               checkpoint_file,
               iters_per_checkpoint,
               iters_per_eval,
               validation_inds,
               logfile,
               step_rate=1e-4,
               decay=0.9,
               dm_thresh=0.1):
    # TODO:
    n_samples = len(image_labels)
    n_minibatch = 1
    n_iters = int(np.ceil(n_epochs * n_samples / n_minibatch))

    word_dim = word_vecs.shape[1]
    image_dim = image_vecs.shape[1]

    # Initialize M
    m_flat = np.random.randn(word_dim * image_dim)
    #    m_flat = np.zeros(word_dim * image_dim)
    #    m_flat = np.random.randn(word_dim * image_dim)

    # Beware momentum, as it can cause nonconvergence.
    devise_args = make_minibatch_iterator(image_vecs,
                                          image_labels,
                                          word_vecs,
                                          n_minibatch=1)
    #opt = climin.RmsProp(m_flat, devise_loss_one_sample, step_rate=step_rate, decay=decay, args=devise_args)
    opt = climin.GradientDescent(m_flat,
                                 devise_loss_one_sample,
                                 step_rate=step_rate,
                                 momentum=.95,
                                 args=devise_args)

    old_m_flat = np.copy(m_flat)

    last_validation_loss = np.nan

    lf = open(logfile, 'w')

    for info in opt:
        if info["n_iter"] % iters_per_checkpoint == 0:
            save.save(checkpoint_file,
                      info=info,
                      m_flat=m_flat,
                      last_validation_loss=last_validation_loss)

        # No validation set yet
        if info["n_iter"] % iters_per_eval == 0:
            dm = np.linalg.norm(m_flat - old_m_flat, 1)

            if dm < dm_thresh:
                print("Optimization converged at %d iters: dm < %g." %
                      (info["n_iter"], dm))
                return (M, info)

            old_m_flat = np.copy(m_flat)
            last_validation_loss = validation_loss(m_flat, image_vecs,
                                                   image_labels, word_vecs,
                                                   validation_inds)
            print("Iter %d, dM (1-norm) = %g, validation loss = %g" %
                  (info["n_iter"], dm, last_validation_loss))
            lf.write("Iter %d, dM (1-norm) = %g, validation loss = %g\n" %
                     (info["n_iter"], dm, last_validation_loss))
            lf.flush()

        if info["n_iter"] == n_iters:
            M = np.reshape(m_flat, (word_dim, image_dim))
            lf.close()
            return (M, info)
Esempio n. 5
0
def climin_wrapper(oracle,
                   w0,
                   train_points,
                   train_targets,
                   options,
                   method='AdaDelta'):
    default_options = {
        'maxiter': 1000,
        'print_freq': 1,
        'verbose': False,
        'g_tol': 1e-5,
        'batch_size': 10,
        'step_rate': 0.1
    }
    if not options is None:
        default_options.update(options)
        if 'print_freq' in options.keys():
            default_options['verbose'] = True
    options = default_options

    w = w0.copy()
    data = ((i, {}) for i in iter_minibatches([train_points, train_targets],
                                              options['batch_size'], [1, 0]))

    if method == 'AdaDelta':
        opt = climin.Adadelta(wrt=w,
                              fprime=oracle,
                              args=data,
                              step_rate=options['step_rate'])
    elif method == 'SG':
        opt = climin.GradientDescent(wrt=w,
                                     fprime=oracle,
                                     args=data,
                                     step_rate=options['step_rate'])
    else:
        raise ValueError('Unknown optimizer')

    w_lst = [w.copy()]
    time_lst = [0.]
    start = time.time()
    n_epochs = options['maxiter']
    n_iterations = int(n_epochs * train_targets.size / options['batch_size'])
    print_freq = int(options['print_freq'] * train_targets.size /
                     options['batch_size'])

    if options['verbose']:
        print('Using ' + method + ' optimizer')
    for info in opt:
        i = info['n_iter']
        if i > n_iterations:
            break
        if not (i % print_freq) and options['verbose']:
            grad = info['gradient']
            print("Iteration ",
                  int(i * options['batch_size'] / train_targets.size), ":")
            print("\tGradient norm", np.linalg.norm(grad))
        if not i % int(train_targets.size / options['batch_size']):
            w_lst.append(w.copy())
            time_lst.append(time.time() - start)

    return w.copy(), w_lst, time_lst
Esempio n. 6
0
def run_dA(learning_rate=0.1,
           n_epochs=5,
           optimizer='gd',
           n_hidden=500,
           dataset='mnist.pkl.gz',
           batch_size=20,
           n_in=28 * 28,
           corruption=0.0,
           l1_penalty=0.0,
           print_reconstructions=False,
           print_filters=False):

    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    n_train_batches = train_set_x.shape[0] // batch_size

    x = T.matrix('x')
    rng = np.random.RandomState(1234)
    theano_rng = RandomStreams(rng.randint(2**30))

    print('...building model')
    dims = [(n_in, n_hidden), n_hidden, n_in]
    flat, (vis_W, hidden_b, vis_b) = climin.util.empty_with_views(dims)

    # initialize with values
    Weights_1_init = rng.uniform(low=-4 * np.sqrt(6. / (n_hidden + n_in)),
                                 high=4 * np.sqrt(6. / (n_hidden + n_in)),
                                 size=(n_in, n_hidden))

    bias_1_init = np.zeros((n_hidden, ), dtype=theano.config.floatX)
    bias_2_init = np.zeros((n_in, ), dtype=theano.config.floatX)

    def initialize_in_place(array, values):
        for j in range(0, len(values)):
            array[j] = values[j]

    initialize_in_place(vis_W, Weights_1_init)
    initialize_in_place(hidden_b, bias_1_init)
    initialize_in_place(vis_b, bias_2_init)

    params = [
        theano.shared(value=vis_W, name='W', borrow=True),
        theano.shared(value=hidden_b, name='b', borrow=True),
        theano.shared(value=vis_b, name='b_prime', borrow=True)
    ]

    da = dA(numpy_rng=rng,
            parameters=params,
            theano_rng=theano_rng,
            input=x,
            n_visible=28 * 28,
            n_hidden=500,
            corruption=corruption,
            l1_penalty=l1_penalty)

    def d_loss(parameters, inputs, targets):
        g_W, g_hidden_b, g_vis_b = da.gradients(inputs)

        return np.concatenate([g_W.flatten(), g_hidden_b, g_vis_b])

    if not batch_size:
        args = itertools.repeat(([train_set_x, train_set_y], {}))
    else:
        args = ((i, {}) for i in climin.util.iter_minibatches(
            [train_set_x, train_set_y], batch_size, [0, 0]))

    if optimizer == 'gd':
        print('... using gradient descent')
        opt = climin.GradientDescent(flat,
                                     d_loss,
                                     step_rate=learning_rate,
                                     momentum=0.95,
                                     args=args)
    elif optimizer == 'rmsprop':
        print('... using rmsprop')
        opt = climin.rmsprop.RmsProp(flat, d_loss, step_rate=0.01, args=args)
    else:
        print('unknown optimizer')
        opt = None

    print('...encoding')
    epoch = 0
    start_time = timeit.default_timer()

    for info in opt:
        iter = info['n_iter']
        if iter % n_train_batches == 1:
            epoch += 1
            this_loss = da.loss(train_set_x)
            print('\nTraining epoch %d, cost ' % epoch, this_loss)
            if epoch >= n_epochs:
                break

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    if print_filters:
        print(
            ('The no corruption code for file ' + os.path.split(__file__)[1] +
             ' ran for %.2fm' % ((training_time) / 60.)),
            file=sys.stderr)
        image = Image.fromarray(
            tile_raster_images(X=da.W.get_value(borrow=True).T,
                               img_shape=(28, 28),
                               tile_shape=(int(math.sqrt(n_hidden)),
                                           int(math.sqrt(n_hidden))),
                               tile_spacing=(1, 1)))
        image.save('filters_' + optimizer + ' n_hidden=' + str(n_hidden) +
                   'corruption=' + str(corruption) + ' and l1_pen=' +
                   str(l1_penalty) + '.png',
                   dpi=(300, 300))

    if print_reconstructions:
        data = train_set_x[:100]
        reconstruction = da.reconstructed_input(data)
        image = Image.fromarray(
            tile_raster_images(X=reconstruction,
                               img_shape=(28, 28),
                               tile_shape=(10, 10),
                               tile_spacing=(1, 1)))
        image.save('reconstructions of first 100_' + optimizer + ' n_hidden=' +
                   str(n_hidden) + 'corruption=' + str(corruption) +
                   ' and l1_pen=' + str(l1_penalty) + '.png',
                   dpi=(300, 300))
Esempio n. 7
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=200,
             batch_size=100,
             n_hidden=300,
             optimizer='GradientDescent',
             activation=T.tanh,
             a=(1, -0.98),
             b=(1, -1)):

    #---- Configure ----
    participant = 1
    series = 1
    no_series = 1
    datatype = 'eeg'
    trials_from = 1
    trials_to = 'end'
    normalize_data = False
    normalize_per_trial = True
    keep_test_unshuffled = False
    #-------------------
    """error_lists = [None, None]
    for i in [ 1]:
        if(i == 0):
            normalize_data = True
            normalize_per_trial = False
        else:
            normalize_data = False
            normalize_per_trial = True"""

    # Get data
    ws = get_ws(participant=participant, series=series)
    windows = ws.get('win')
    (data, trials, led) = get_data(windows,
                                   datatype=datatype,
                                   trials_from=trials_from,
                                   trials_to=trials_to,
                                   normalize_per_trial=normalize_per_trial)
    for i in range(no_series - 1):
        ws = get_ws(participant=participant, series=series + i + 1)
        windows = ws.get('win')
        (data_temp, trials_temp,
         led_temp) = get_data(windows,
                              datatype=datatype,
                              trials_from=trials_from,
                              trials_to=trials_to,
                              normalize_per_trial=normalize_per_trial)
        data = np.vstack((data, data_temp))
        trials = np.concatenate((trials, trials_temp + trials[-1]))
        led = np.concatenate((led, led_temp))

    #Convert led vector to contain 0 for LEDoff and 1 for LEDon
    led_temp = np.zeros((data.shape[0], ))
    led_temp[led] = 1
    led = led_temp

    #For classifying LEDon / LEDoff uncomment following line
    trials = led + 1

    # Filtering
    #a = (1, -0.98)
    #b = (1, -1)
    #data = signal.filtfilt(b, a, data)

    n = data.shape[0]
    n_train = 4 * n // 9
    n_valid = 2 * n // 9
    n_test = n - n_train - n_valid

    if normalize_data:
        data[...] = normalize(data)
    if keep_test_unshuffled:
        (temp, undo_shuffle) = shuffle(np.c_[data[:n_train + n_valid],
                                             trials[:n_train + n_valid] - 1])
        test_set_x, test_set_y = [
            data[n_train + n_valid:], trials[n_train + n_valid:] - 1
        ]
    else:
        (temp, undo_shuffle) = shuffle(np.c_[data, trials - 1])
        test_set_x, test_set_y = (temp[n_train + n_valid:, :data.shape[1]],
                                  temp[n_train + n_valid:, data.shape[1]:])

    train_set_x, train_set_y = (temp[:n_train, :data.shape[1]],
                                temp[:n_train, data.shape[1]:])
    valid_set_x, valid_set_y = (temp[n_train:n_train +
                                     n_valid, :data.shape[1]],
                                temp[n_train:n_train + n_valid,
                                     data.shape[1]:])

    #Use following line for NOT shuffled test data
    #test_set_x, test_set_y = (data[n_train + n_valid:, :data.shape[1]], data[n_train + n_valid:, data.shape[1]:])

    # Reshaping data from (n,1) to (n,)
    train_set_y = train_set_y.reshape(train_set_y.shape[0], )
    valid_set_y = valid_set_y.reshape(valid_set_y.shape[0], )
    test_set_y = test_set_y.reshape(test_set_y.shape[0], )

    n_train_batches = train_set_x.shape[0] // batch_size
    print('Building the Model...')

    x = T.matrix('x')
    y = T.ivector('y')

    rng = np.random.RandomState(1234)

    n_in = data.shape[1]
    n_out = np.unique(trials).shape[0]
    dims = [(n_in, n_hidden), n_hidden, (n_hidden, n_out), n_out]
    flat, (hidden_W, hidden_b, logreg_W,
           logreg_b) = climin.util.empty_with_views(dims)
    climin.initialize.randomize_normal(flat, loc=0, scale=0.1)
    #hidden_W[...] = np.asarray(rng.uniform(low=-4*np.sqrt(6. / (n_in + n_hidden)), high=4*np.sqrt(6. / (n_in + n_hidden)), size=(n_in, n_hidden)))
    parameters = [
        theano.shared(value=hidden_W, name='W', borrow=True),
        theano.shared(value=hidden_b, name='b', borrow=True),
        theano.shared(value=logreg_W, name='W', borrow=True),
        theano.shared(value=logreg_b, name='b', borrow=True)
    ]

    classifier = MLP(rng=rng,
                     input=x,
                     n_in=n_in,
                     n_hidden=n_hidden,
                     n_out=n_out,
                     activation=activation,
                     parameters=parameters)

    cost = classifier.negative_log_likelihood(
        y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
    gparams = [T.grad(cost, param) for param in classifier.params]
    """ Theano functions """
    grad_W = theano.function([x, y], gparams, allow_input_downcast=True)

    #print('Setting up Climin...')
    """ Setting up Climin """
    def d_loss(parameters, inputs, targets):
        g_hl_W, g_hl_b, g_lr_W, g_lr_b = grad_W(inputs, targets)

        return np.concatenate(
            [g_hl_W.flatten(), g_hl_b,
             g_lr_W.flatten(), g_lr_b])

    minibatch = True
    if not minibatch:
        args = itertools.repeat(([train_set_x, train_set_y], {}))
    else:
        args = ((i, {}) for i in climin.util.iter_minibatches(
            [train_set_x, train_set_y], batch_size, [0, 0]))

    if optimizer == 'GradientDescent':
        print('Running GradientDescent')
        opt = climin.GradientDescent(flat,
                                     d_loss,
                                     step_rate=0.01,
                                     momentum=0.95,
                                     args=args)
    elif optimizer == 'RmsProp':
        print('Running RmsProp')
        opt = climin.rmsprop.RmsProp(flat, d_loss, step_rate=0.01, args=args)
    #elif optimizer == 'NonlinearConjugateGradient':
    #    opt = climin.cg.NonlinearConjugateGradient(d_loss, loss, d_loss, min_grad=1e-06, args=args)
    elif optimizer == 'Adadelta':
        print('Running Adadelta')
        opt = climin.adadelta.Adadelta(flat,
                                       d_loss,
                                       step_rate=0.01,
                                       decay=0.9,
                                       momentum=0,
                                       offset=0.001,
                                       args=args)
    elif optimizer == 'Adam':
        print('Running Adam')
        opt = climin.adam.Adam(flat,
                               d_loss,
                               step_rate=0.001,
                               decay=0.3,
                               decay_mom1=0.1,
                               decay_mom2=0.001,
                               momentum=0,
                               offset=1e-08,
                               args=args)
    elif optimizer == 'Rprop':
        print('Running Rprop')
        opt = climin.rprop.Rprop(flat,
                                 d_loss,
                                 step_shrink=0.5,
                                 step_grow=1.2,
                                 min_step=1e-06,
                                 max_step=1,
                                 changes_max=0.1,
                                 args=args)
    else:
        print('Optimizer not available!')
        opt = None

    zero_one_loss = theano.function(
        inputs=[x, y],
        outputs=classifier.logRegressionLayer.errors(y),
        allow_input_downcast=True)

    p_y_given_x = theano.function(
        inputs=[x],
        outputs=classifier.logRegressionLayer.p_y_given_x,
        allow_input_downcast=True)

    print('Running Optimization...\n')
    print('Classifying %d classes' % n_out)

    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = n_train_batches  #min(n_train_batches, patience // 2)

    best_validation_loss = np.inf
    best_iter = 0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    train_error_list = []
    valid_error_list = []
    test_error_list = []

    #model = Model(classifier.params)

    train_score = zero_one_loss(train_set_x, train_set_y) * 100
    this_validation_loss = zero_one_loss(valid_set_x, valid_set_y) * 100
    test_score = zero_one_loss(test_set_x, test_set_y) * 100

    train_error_list.append(train_score)
    valid_error_list.append(this_validation_loss)
    test_error_list.append(test_score)

    for info in opt:
        iter = info['n_iter']
        """if (iter % 1)==0:
            stdout.write("\r%f%% of Epoch %d" % (float(iter * 100)/n_train_batches - epoch * 100, epoch))
            stdout.flush()"""

        if (iter + 1) % validation_frequency == 1:
            epoch += 1

            train_score = zero_one_loss(train_set_x, train_set_y) * 100
            this_validation_loss = zero_one_loss(valid_set_x,
                                                 valid_set_y) * 100
            test_score = zero_one_loss(test_set_x, test_set_y) * 100

            train_error_list.append(train_score)
            valid_error_list.append(this_validation_loss)
            test_error_list.append(test_score)

            print('\nEpoch %i, Validation Error:\t %f%%' %
                  (epoch, this_validation_loss))

            if this_validation_loss < best_validation_loss:
                if (this_validation_loss <
                        best_validation_loss * improvement_threshold):
                    patience = max(patience, iter * patience_increase)

                best_validation_loss = this_validation_loss
                best_test_score = test_score
                best_iter = iter

                print(('Epoch %i, Test Error:\t %f%% \t NEW MODEL') %
                      (epoch, test_score))
                p_LEDon = p_y_given_x(test_set_x)[:, 1]
                #with open('model.pkl', 'wb') as f:
                #print('Dump Model')
                #    pickle.dump(model, f)

            if (epoch >= n_epochs) or done_looping:
                break

            print('')

        if patience <= iter:
            done_looping = True
            break

    #scores = Scores(train_error_list, valid_error_list, test_error_list, [best_validation_loss, test_score])
    #with open('scores.pkl', 'wb') as f:
    #    pickle.dump(scores, f)

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss, best_iter + 1, best_test_score))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)

    #error_lists[i] = (train_error_list, valid_error_list, test_error_list)
    return (train_error_list, valid_error_list,
            test_error_list), (best_validation_loss,
                               best_test_score), (test_set_y, p_LEDon)
Esempio n. 8
0
    def build_and_train_rbf(self, X, Y):

        y_onehot = self.class_to_onehot(Y)
        n_dims = y_onehot.shape[1]
        centers = self.compute_centers(X)

        x = T.dmatrix()
        y = T.imatrix()

        #bias, centers, sigmas, weights
        template = [
            n_dims, centers.shape, self.l1_size, (self.l1_size, n_dims)
        ]

        #initialize and train RBF network
        model = theano_rbfnet(input=x,
                              n_cents=self.l1_size,
                              centers=centers,
                              n_dims=n_dims,
                              reg=self.penalty)

        cost = model.neg_log_likelihood(y)

        g_b = T.grad(cost, model.b)
        g_c = T.grad(cost, model.c)
        g_s = T.grad(cost, model.s)
        g_w = T.grad(cost, model.w)

        g_params = T.concatenate(
            [g_b.flatten(),
             g_c.flatten(),
             g_s.flatten(),
             g_w.flatten()])

        getcost = theano.function([x, y], outputs=cost)
        getdcost = theano.function([x, y], outputs=g_params)

        def cost_fcn(params, inputs, targets):
            model.set_params(params, template)
            x = inputs
            y = targets
            return getcost(x, y)

        def cost_grad(params, inputs, targets):
            model.set_params(params, template)
            x = inputs
            y = targets
            return getdcost(x, y)

        args = climin.util.iter_minibatches([X, y_onehot], self.batch_size,
                                            [0, 0])
        batch_args = itertools.repeat(([X, y_onehot], {}))
        args = ((i, {}) for i in args)
        init_params = model.get_params(template)

        opt_sgd = climin.GradientDescent(init_params,
                                         cost_fcn,
                                         cost_grad,
                                         steprate=0.1,
                                         momentum=0.99,
                                         args=args,
                                         momentum_type="nesterov")

        opt_ncg = climin.NonlinearConjugateGradient(init_params,
                                                    cost_fcn,
                                                    cost_grad,
                                                    args=batch_args)

        opt_lbfgs = climin.Lbfgs(init_params,
                                 cost_fcn,
                                 cost_grad,
                                 args=batch_args)
        #choose the optimizer
        if self.optimizer == 'sgd':
            optimizer = opt_sgd
        elif self.optimizer == 'ncg':
            optimizer = opt_ncg
        else:
            optimizer = opt_lbfgs

        #do the actual training.
        costs = []
        for itr_info in optimizer:
            if itr_info['n_iter'] > self.max_iters: break
            costs.append(itr_info['loss'])

        model.set_params(init_params, template)
        return model, costs
Esempio n. 9
0
    def build_and_train_nnet(self, X, Y):

        y_onehot = self.class_to_onehot(Y)
        n_in = X.shape[1]
        n_nodes = self.l1_size
        n_out = y_onehot.shape[1]

        x = T.dmatrix()
        y = T.imatrix()

        #bias1, bias2, weights1, weights2
        template = [(n_nodes, ), (n_out, ), (n_in, n_nodes), (n_nodes, n_out)]

        #initialize nnet
        model = nnet(input=x, n_in=n_in, n_nodes=n_nodes, n_out=n_out)
        cost = model.neg_log_likelihood(y)

        g_b1 = T.grad(cost, model.b1)
        g_b2 = T.grad(cost, model.b2)
        g_w1 = T.grad(cost, model.w1)
        g_w2 = T.grad(cost, model.w2)

        g_params = T.concatenate(
            [g_b1.flatten(),
             g_b2.flatten(),
             g_w1.flatten(),
             g_w2.flatten()])

        getcost = theano.function([x, y], outputs=cost)
        getdcost = theano.function([x, y], outputs=g_params)

        def cost_fcn(params, inputs, targets):
            model.set_params(params, template)
            x = inputs
            y = targets
            return getcost(x, y)

        def cost_grad(params, inputs, targets):
            model.set_params(params, template)
            x = inputs
            y = targets
            return getdcost(x, y)

        args = climin.util.iter_minibatches([X, y_onehot], self.batch_size,
                                            [0, 0])
        batch_args = itertools.repeat(([X, y_onehot], {}))
        args = ((i, {}) for i in args)
        init_params = model.get_params(template)

        opt_sgd = climin.GradientDescent(init_params,
                                         cost_fcn,
                                         cost_grad,
                                         steprate=0.01,
                                         momentum=0.99,
                                         args=args,
                                         momentum_type="nesterov")

        opt_ncg = climin.NonlinearConjugateGradient(init_params,
                                                    cost_fcn,
                                                    cost_grad,
                                                    args=batch_args)

        opt_lbfgs = climin.Lbfgs(init_params,
                                 cost_fcn,
                                 cost_grad,
                                 args=batch_args)
        #choose the optimizer
        if self.optimizer == 'sgd':
            optimizer = opt_sgd
        elif self.optimizer == 'ncg':
            optimizer = opt_ncg
        else:
            optimizer = opt_lbfgs

        #do the actual training.
        costs = []
        for itr_info in optimizer:
            if itr_info['n_iter'] > self.max_iters: break
            costs.append(itr_info['loss'])

        model.set_params(init_params, template)
        return model, costs
Esempio n. 10
0
        if method == 'adam':
            opt = climin.Adam(model.optimizer_array,
                              model.stochastic_grad,
                              step_rate=0.005,
                              decay_mom1=1 - 0.9,
                              decay_mom2=1 - 0.999)
            ELBO.append(model.log_likelihood())
            #NLPD.append(model.negative_log_predictive(Xtest, Ytest, num_samples=1000))
            start = time.time()
            myTimes.append(start)
            print('Running Adam...')
            info = opt.minimize_until(callback)

        elif method == 'sgd':
            opt = climin.GradientDescent(model.optimizer_array,
                                         model.stochastic_grad,
                                         step_rate=1e-15,
                                         momentum=0.0)
            ELBO.append(model.log_likelihood())
            #NLPD.append(model.negative_log_predictive(Xtest, Ytest, num_samples=1000))
            start = time.time()
            myTimes.append(start)
            print('Running SGD...')
            info = opt.minimize_until(callback)
        elif method == 'adad':
            opt = climin.Adadelta(model.optimizer_array,
                                  model.stochastic_grad,
                                  step_rate=0.005,
                                  momentum=0.9)
            ELBO.append(model.log_likelihood())
            #NLPD.append(model.negative_log_predictive(Xtest, Ytest, num_samples=1000))
            start = time.time()