Exemple #1
0
def train(model, batch_size=100, algorithm="adam", max_iter=1000):
    """
    Train a george-based Gaussian process model.
    """

    def callback(p):
        print('{}\t{}'.format(np.exp(p),  model.log_evidence(p, n=batch_size)[0]))

    def nll(k):
        ll = model.log_evidence(k, n=batch_size)[0]
        return -ll if np.isfinite(ll) else 1e25


    
    def grad_nll(k):
        return - model.log_evidence(k, n=batch_size)[1]

    def grad_ll(k):
        return model.log_evidence(k, n=batch_size)[1]

    # Get the default value of the hyperparameters as the initial point for the optimisation
    p0 = model.gp.get_parameter_vector()

    model.train()
    
    if not batch_size == None:
        if algorithm == "adam":
            """
            Optimise using the adam algorithm.
            """
            import climin
            opt = climin.Adam(p0, grad_nll)

        for info in opt:
                    if info['n_iter']%10 == 0:
                        k = model.gp.get_parameter_vector()
                        print("{} - {} - {}".format(info['n_iter'],
                                                    model.log_evidence(k, n=batch_size)[0],
                                                    np.exp(k)
                        ))
                        
                    if info['n_iter'] > max_iter: break
        results = model.gp.get_parameter_vector()

    else:
                
        results = op.minimize(nll, p0, jac=grad_nll, method="L-BFGS-B", callback=callback)
        model.gp.set_parameter_vector(results.x)
        

    model.eval()

    return results
Exemple #2
0
def vem_algorithm(model,
                  vem_iters=None,
                  maxIter_perVEM=None,
                  step_rate=None,
                  verbose=False,
                  optZ=True,
                  verbose_plot=False,
                  non_chained=True):
    if vem_iters is None:
        vem_iters = 5
    if maxIter_perVEM is None:
        maxIter_perVEM = 100

    model['.*.kappa'].fix()  # must be always fixed
    #model.elbo = np.empty((vem_iters,1))

    if model.batch_size is None:

        for i in range(vem_iters):
            # VARIATIONAL E-STEP
            model['.*.lengthscale'].fix()
            model['.*.variance'].fix()
            model.Z.fix()
            model['.*.W'].fix()

            model.q_u_means.unfix()
            model.q_u_chols.unfix()
            model.optimize(messages=verbose, max_iters=maxIter_perVEM)
            print('iteration (' + str(i + 1) + ') VE step, log_likelihood=' +
                  str(model.log_likelihood().flatten()))

            # VARIATIONAL M-STEP
            model['.*.lengthscale'].unfix()
            model['.*.variance'].unfix()
            if optZ:
                model.Z.unfix()
            if non_chained:
                model['.*.W'].unfix()

            model.q_u_means.fix()
            model.q_u_chols.fix()
            model.optimize(messages=verbose, max_iters=maxIter_perVEM)
            print('iteration (' + str(i + 1) + ') VM step, log_likelihood=' +
                  str(model.log_likelihood().flatten()))

    else:

        if step_rate is None:
            step_rate = 0.01

        # Here the E step has maxIter_perVEM (100 by default) and
        # the M step has also maxIter_perVEM (100 by default)
        model.elbo = np.empty((2 * maxIter_perVEM * vem_iters + 2, 1))
        model.elbo[0, 0] = model.log_likelihood()
        c_full = partial(model.callback,
                         max_iter=maxIter_perVEM,
                         verbose=verbose,
                         verbose_plot=verbose_plot)

        for i in range(vem_iters):
            # VARIATIONAL E-STEP
            model['.*.lengthscale'].fix()
            model['.*.variance'].fix()
            model.Z.fix()
            model['.*.W'].fix()

            model.q_u_means.unfix()
            model.q_u_chols.unfix()
            optimizer = climin.Adam(model.optimizer_array,
                                    model.stochastic_grad,
                                    step_rate=step_rate,
                                    decay_mom1=1 - 0.9,
                                    decay_mom2=1 - 0.999)
            model.index_VEM = 2 * (i) * maxIter_perVEM
            optimizer.minimize_until(c_full)
            # vo.variational_opt_HetMOGP(model=model, max_iters=maxIter_perVEM, step_size=step_rate, momentum=0.0,prior_lambda=1.0e-1,MC=1)

            print('iteration (' + str(i + 1) +
                  ') VE step, mini-batch log_likelihood=' +
                  str(model.log_likelihood().flatten()))
            #
            # # VARIATIONAL M-STEP
            model['.*.lengthscale'].unfix()
            model['.*.variance'].unfix()
            if optZ:
                model.Z.unfix()
            if non_chained:
                model['.*.W'].unfix()

            model.q_u_means.fix()
            model.q_u_chols.fix()
            optimizer = climin.Adam(model.optimizer_array,
                                    model.stochastic_grad,
                                    step_rate=step_rate,
                                    decay_mom1=1 - 0.9,
                                    decay_mom2=1 - 0.999)
            model.index_VEM = 2 * (i) * maxIter_perVEM + maxIter_perVEM
            optimizer.minimize_until(c_full)
            # vo.variational_opt_HetMOGP(model=model, max_iters=maxIter_perVEM, step_size=step_rate, momentum=0.0,prior_lambda=1.0e-1,MC=1)
            print('iteration (' + str(i + 1) +
                  ') VM step, mini-batch log_likelihood=' +
                  str(model.log_likelihood().flatten()))

    return model
Exemple #3
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=500,
             dataset='mnist.pkl.gz',
             batch_size=20,
             n_hidden=500,
             optimizer='gd',
             activation=T.tanh):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    tmpl = [(28 * 28, n_hidden), n_hidden, (n_hidden, 10), 10]
    flat, (Weights_1, bias_1, Weights_2,
           bias_2) = climin.util.empty_with_views(tmpl)

    #Initialize weights with uniformal distribution according to the tutorial
    rng = numpy.random.RandomState(1234)
    Weights_1_init = rng.uniform(low=-numpy.sqrt(6. / (28 * 28 + n_hidden)),
                                 high=numpy.sqrt(6. / (28 * 28 + n_hidden)),
                                 size=(28 * 28, n_hidden))

    Weights_2_init = rng.uniform(low=-numpy.sqrt(6. / (n_hidden + 10)),
                                 high=numpy.sqrt(6. / (n_hidden + 10)),
                                 size=(n_hidden, 10))

    bias_1_init = numpy.zeros((n_hidden, ), dtype=theano.config.floatX)
    bias_2_init = numpy.zeros((10, ), dtype=theano.config.floatX)

    if activation == T.nnet.sigmoid:
        Weights_1_init *= 4
        Weights_2_init *= 4

    def initialize_in_place(array, values):
        for j in range(0, len(values)):
            array[j] = values[j]

    initialize_in_place(Weights_1, Weights_1_init)
    initialize_in_place(Weights_2, Weights_2_init)
    initialize_in_place(bias_1, bias_1_init)
    initialize_in_place(bias_2, bias_2_init)

    if batch_size is None:
        args = itertools.repeat(([train_set_x, train_set_y], {}))
        n_train_batches = 1
    else:
        args = cli.util.iter_minibatches([train_set_x, train_set_y],
                                         batch_size, [0, 0])
        args = ((i, {}) for i in args)
        n_train_batches = train_set_x.shape[0] // batch_size

    print('... building the model')

    x = T.matrix('x')
    y = T.ivector('y')

    rng = numpy.random.RandomState(1234)

    classifier = MLP(rng=rng,
                     input=x,
                     n_in=28 * 28,
                     n_hidden=n_hidden,
                     n_out=10,
                     Weights_1=theano.shared(value=Weights_1,
                                             name='W',
                                             borrow=True),
                     bias_1=theano.shared(value=bias_1, name='b', borrow=True),
                     Weights_2=theano.shared(value=Weights_2,
                                             name='W',
                                             borrow=True),
                     bias_2=theano.shared(value=bias_2, name='b', borrow=True),
                     activation=T.tanh)

    #cost with regularisation terms
    cost = theano.function(inputs=[x, y],
                           outputs=classifier.negative_log_likelihood(y) +
                           L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                           allow_input_downcast=True)

    # gradients with regularisation terms
    gradients = theano.function(
        inputs=[x, y],
        outputs=[
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.hiddenLayer.W),
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.hiddenLayer.b),
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.logRegressionLayer.W),
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.logRegressionLayer.b)
        ],
        allow_input_downcast=True)

    def loss(parameters, input, target):
        return cost(input, target)

    def d_loss_wrt_pars(parameters, inputs, targets):
        g_W_1, g_b_1, g_W_2, g_b_2 = gradients(inputs, targets)

        return numpy.concatenate(
            [g_W_1.flatten(), g_b_1,
             g_W_2.flatten(), g_b_2])

    zero_one_loss = theano.function(inputs=[x, y],
                                    outputs=classifier.errors(y),
                                    allow_input_downcast=True)

    if optimizer == 'gd':
        print('... using gradient descent')
        opt = cli.GradientDescent(flat,
                                  d_loss_wrt_pars,
                                  step_rate=learning_rate,
                                  momentum=.95,
                                  args=args)
    elif optimizer == 'bfgs':
        print('... using using quasi-newton BFGS')
        opt = cli.Bfgs(flat, loss, d_loss_wrt_pars, args=args)
    elif optimizer == 'lbfgs':
        print('... using using quasi-newton L-BFGS')
        opt = cli.Lbfgs(flat, loss, d_loss_wrt_pars, args=args)
    elif optimizer == 'nlcg':
        print('... using using non linear conjugate gradient')
        opt = cli.NonlinearConjugateGradient(flat,
                                             loss,
                                             d_loss_wrt_pars,
                                             min_grad=1e-03,
                                             args=args)
    elif optimizer == 'rmsprop':
        print('... using rmsprop')
        opt = cli.RmsProp(flat,
                          d_loss_wrt_pars,
                          step_rate=1e-4,
                          decay=0.9,
                          args=args)
    elif optimizer == 'rprop':
        print('... using resilient propagation')
        opt = cli.Rprop(flat, d_loss_wrt_pars, args=args)
    elif optimizer == 'adam':
        print('... using adaptive momentum estimation optimizer')
        opt = cli.Adam(flat,
                       d_loss_wrt_pars,
                       step_rate=0.0002,
                       decay=0.99999999,
                       decay_mom1=0.1,
                       decay_mom2=0.001,
                       momentum=0,
                       offset=1e-08,
                       args=args)
    elif optimizer == 'adadelta':
        print('... using adadelta')
        opt = cli.Adadelta(flat,
                           d_loss_wrt_pars,
                           step_rate=1,
                           decay=0.9,
                           momentum=.95,
                           offset=0.0001,
                           args=args)
    else:
        print('unknown optimizer')
        return 1

    print('... training')

    # early stopping parameters
    if batch_size == None:
        patience = 250
    else:
        patience = 10000  # look at this many samples regardless

    patience_increase = 2  # wait this mutch longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this mutch is considered signigicant
    validation_frequency = min(n_train_batches, patience // 2)
    best_validation_loss = numpy.inf
    test_loss = 0.

    valid_losses = []
    train_losses = []
    test_losses = []

    epoch = 0

    start_time = timeit.default_timer()

    for info in opt:
        iter = info['n_iter']
        epoch = iter // n_train_batches
        minibatch_index = iter % n_train_batches

        if iter % validation_frequency == 0:
            validation_loss = zero_one_loss(valid_set_x, valid_set_y)
            valid_losses.append(validation_loss)
            train_losses.append(zero_one_loss(train_set_x, train_set_y))
            test_losses.append(zero_one_loss(test_set_x, test_set_y))

            print(
                'epoch %i, minibatch %i/%i, validation error % f %%, iter/patience %i/%i'
                % (epoch, minibatch_index + 1, n_train_batches,
                   validation_loss * 100, iter, patience))
            # if we got the best validation score until now
            if validation_loss < best_validation_loss:
                # improve patience if loss improvement is good enough
                if validation_loss < best_validation_loss * improvement_threshold:
                    patience = max(patience, iter * patience_increase)
                best_validation_loss = validation_loss
                # test it on the test set
                test_loss = zero_one_loss(test_set_x, test_set_y)

                print(
                    '    epoch %i, minibatch %i/%i, test error of best model %f %%'
                    % (epoch, minibatch_index + 1, n_train_batches,
                       test_loss * 100))

        if patience <= iter or epoch >= n_epochs:
            break

    end_time = timeit.default_timer()
    print((
        'Optimization complete. Best validation score of %f %% with test performance %f %%'
    ) % (best_validation_loss * 100., test_loss * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)

    losses = (train_losses, valid_losses, test_losses)

    return classifier, losses
Exemple #4
0
def optimise_HetMOGP(model,
                     Xval=None,
                     Yval=None,
                     max_iters=1000,
                     step_rate=0.01,
                     decay_mom1=1 - 0.9,
                     decay_mom2=1 - 0.999,
                     fng=False,
                     q_s_ini=0.0,
                     prior_lamb_or_offset=None):
    if prior_lamb_or_offset is None:
        prior_lamb_or_offset = 1e-8
    global mk_ant, mk_aux, mk, V_i, Vk, Lk, Vk, Vki_ant

    def natural_grad_qu(model, n_iter=1, step_size=step_rate, momentum=0.0):
        global mk_ant, mk_aux, mk, V_i, Vk, Lk, Vk, Vki_ant
        """"Initialize the step-sizes""" ""
        beta2_k = step_size  #use step_size*0.1 for Convolutional MOGP
        gamma2_k = momentum
        alpha2_k = step_size
        N_posteriors = model.q_u_means.shape[1]

        if n_iter == 1:
            V_i = choleskies.multiple_dpotri(
                choleskies.flat_to_triang(model.q_u_chols.values)).copy()
            Vk = np.zeros_like(V_i)
            for i in range(N_posteriors):
                Vk[i, :, :] = 0.5 * (model.posteriors[i].covariance.copy() +
                                     model.posteriors[i].covariance.T.copy())

            Lk = np.zeros_like(Vk)
            mk = model.q_u_means.values.copy()

            Vki_ant = V_i.copy()
            mk_aux = mk.copy()

        dL_dm, dL_dV = compute_stoch_grads_for_qu_HetMOGP(model=model)

        mk_ant = mk_aux.copy()
        mk_aux = mk.copy()

        if not model.q_u_means.is_fixed and not model.q_u_chols.is_fixed:
            mk_ant = mk_aux.copy()
            mk_aux = mk.copy()

            for i in range(N_posteriors):
                try:
                    V_i[i, :, :] = V_i[i, :, :] + 2 * beta2_k * dL_dV[
                        i]  #+ 1.0e-6*np.eye(*Vk[i,:,:].shape)
                    Vk[i, :, :] = np.linalg.inv(V_i[i, :, :])
                    Vk[i, :, :] = 0.5 * (np.array(Vk[i, :, :]) +
                                         np.array(Vk[i, :, :].T))
                    Lk[i, :, :] = np.linalg.cholesky(Vk[i, :, :])
                    mk[:, i] = mk[:, i] - alpha2_k * np.dot(
                        Vk[i, :, :], dL_dm[i]) + gamma2_k * np.dot(
                            np.dot(Vk[i, :, :], Vki_ant[i, :, :]),
                            (mk[:, i] - mk_ant[:, i]))
                except LinAlgError:
                    print("Overflow")
                    Vk[i, :, :] = np.linalg.inv(V_i[i, :, :])
                    Vk[i, :, :] = 1.0e-1 * np.eye(
                        *Vk[i, :, :].shape
                    )  #nearestPD(Vk[i,:,:]) # + 1.0e-3*np.eye(*Vk[i,:,:].shape)
                    Lk[i, :, :] = linalg.jitchol(Vk[i, :, :])
                    V_i[i, :, :] = np.linalg.inv(Vk[i, :, :])
                    mk[:, i] = mk[:, i] * 0.0

            Vki_ant = V_i.copy()

            model.L_u.setfield(choleskies.triang_to_flat(Lk.copy()),
                               np.float64)
            model.m_u.setfield(mk.copy(), np.float64)

    global ELBO, myTimes, sched, NLPD
    ELBO = []
    NLPD = []
    myTimes = []
    sched = step_rate

    def callhybrid(i):
        global start
        global ELBO, myTimes, sched, NLPD

        if i['n_iter'] > max_iters:
            model.q_u_means.unfix()
            model.q_u_chols.unfix()
            return True
        model.update_model(False)
        model.q_u_means.unfix()
        model.q_u_chols.unfix()
        if fng: mom = 0.9
        else: mom = 0.0
        natural_grad_qu(model,
                        n_iter=i['n_iter'],
                        step_size=step_rate,
                        momentum=mom)

        model.update_model(True)
        model.q_u_means.fix()
        model.q_u_chols.fix()
        #model.update_model(True)

        ELBO.append(model.log_likelihood())
        myTimes.append(time.time())

        if (i['n_iter']) % 50 == 0:
            print(i['n_iter'])
            print(model.log_likelihood())
            if not (Xval == None or Yval == None):
                NLPD.append(
                    model.negative_log_predictive(Xval, Yval,
                                                  num_samples=1000))

        return False

    model.q_u_means.fix()
    model.q_u_chols.fix()
    if fng is True:
        print('Running Fully NG, check s_ini:', q_s_ini, ' and prior_lamb:',
              prior_lamb_or_offset)
        opt = climin.VarOpt(model.optimizer_array,
                            model.stochastic_grad,
                            step_rate=step_rate,
                            s_ini=q_s_ini,
                            decay_mom1=decay_mom1,
                            decay_mom2=decay_mom2,
                            prior_lambda=prior_lamb_or_offset)
    else:
        print('Running Hybrid (NG+Adam), check offset:', prior_lamb_or_offset)
        opt = climin.Adam(model.optimizer_array,
                          model.stochastic_grad,
                          step_rate=step_rate,
                          decay_mom1=decay_mom1,
                          decay_mom2=decay_mom2,
                          offset=prior_lamb_or_offset)

    ELBO.append(model.log_likelihood())
    if not (Xval == None or Yval == None):
        NLPD.append(model.negative_log_predictive(Xval, Yval,
                                                  num_samples=1000))
    start = time.time()
    myTimes.append(start)
    info = opt.minimize_until(callhybrid)
    return np.array(ELBO).flatten(), np.array(NLPD), np.array(myTimes) - start
def sgd_optimization_mnist(learning_rate=0.01, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600, optimizer='gd'):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    tmpl = [(28 * 28, 10), 10]
    flat, (Weights, bias) = climin.util.empty_with_views(tmpl)

    cli.initialize.randomize_normal(flat, 0, 1)

    if batch_size is None:
        args = itertools.repeat(([train_set_x, train_set_y], {}))
        n_train_batches = 1
    else:
        args = cli.util.iter_minibatches([train_set_x, train_set_y], batch_size, [0, 0])
        args = ((i, {}) for i in args)
        n_train_batches = train_set_x.shape[0] // batch_size

    print('... building the model')

    x = T.matrix('x')
    y = T.ivector('y')

    classifier = LogisticRegression(
            input = x,
            n_in = 28 * 28,
            n_out = 10,
            W = theano.shared(value = Weights, name = 'W', borrow = True),
            b = theano.shared(value = bias, name = 'b', borrow = True)
            )

    gradients = theano.function(
            inputs = [x, y],
            outputs = [
                T.grad(classifier.negative_log_likelihood(y), classifier.W),
                T.grad(classifier.negative_log_likelihood(y), classifier.b)
                ],
            allow_input_downcast = True
            )

    cost = theano.function(
        inputs=[x, y],
        outputs=classifier.negative_log_likelihood(y),
        allow_input_downcast=True
    )

    def loss(parameters, input, target):
        return cost(input, target)

    def d_loss_wrt_pars(parameters, inputs, targets):
        g_W, g_b = gradients(inputs, targets)

        return np.concatenate([g_W.flatten(), g_b])

    zero_one_loss = theano.function(
            inputs = [x, y],
            outputs = classifier.errors(y),
            allow_input_downcast = True
            )

    if optimizer == 'gd':
        print('... using gradient descent')
        opt = cli.GradientDescent(flat, d_loss_wrt_pars, step_rate=learning_rate, momentum=.95, args=args)
    elif optimizer == 'rmsprop':
        print('... using rmsprop')
        opt = cli.RmsProp(flat, d_loss_wrt_pars, step_rate=1e-4, decay=0.9, args=args)
    elif optimizer == 'rprop':
        print('... using resilient propagation')
        opt = cli.Rprop(flat, d_loss_wrt_pars, args=args)
    elif optimizer == 'adam':
        print('... using adaptive momentum estimation optimizer')
        opt = cli.Adam(flat, d_loss_wrt_pars, step_rate = 0.0002, decay = 0.99999999, decay_mom1 = 0.1, decay_mom2 = 0.001, momentum = 0, offset = 1e-08, args=args)
    elif optimizer == 'adadelta':
        print('... using adadelta')
        opt = cli.Adadelta(flat, d_loss_wrt_pars, step_rate=1, decay = 0.9, momentum = .95, offset = 0.0001, args=args)
    else:
        print('unknown optimizer')
        return 1

    print('... training the model')

    # early stopping parameters
    if batch_size== None:
        patience = 250
    else:
        patience = 5000 # look at this many samples regardless

    patience_increase = 2 # wait this mutch longer when a new best is found
    improvement_threshold = 0.995 # a relative improvement of this mutch is considered signigicant
    validation_frequency = min(n_train_batches, patience // 2)
    best_validation_loss = np.inf
    test_loss = 0.

    valid_losses = []
    train_losses = []
    test_losses = []

    epoch = 0

    start_time = timeit.default_timer()
    for info in opt:
        iter = info['n_iter']
        epoch = iter // n_train_batches
        minibatch_index = iter % n_train_batches

        if iter % validation_frequency == 0:
            # compute zero-one loss on validation set
            validation_loss = zero_one_loss(valid_set_x, valid_set_y)
            valid_losses.append(validation_loss)
            train_losses.append(zero_one_loss(train_set_x, train_set_y))
            test_losses.append(zero_one_loss(test_set_x, test_set_y))

            print(
                    'epoch %i, minibatch %i/%i, validation error % f %%, iter/patience %i/%i' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        validation_loss * 100,
                        iter,
                        patience
                        )
                    )
            # if we got the best validation score until now
            if validation_loss < best_validation_loss:
               # improve patience if loss improvement is good enough
                if validation_loss < best_validation_loss * improvement_threshold:
                    patience = max(patience, iter * patience_increase)
                best_validation_loss = validation_loss
                # test it on the test set
                test_loss = zero_one_loss(test_set_x, test_set_y)

                print(
                        '    epoch %i, minibatch %i/%i, test error of best model %f %%' %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_loss * 100
                            )
                        )

        if patience <= iter or epoch >= n_epochs:
            break

    end_time = timeit.default_timer()

    print('Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100., test_loss * 100.))
    print('The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time)))
    print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)

    losses = (train_losses, valid_losses, test_losses)

    return classifier, losses
Exemple #6
0
        #model.kern_list[0].fix()
        model['.*.kappa'].fix()

        """"""""""""""""""""""""""""""""""""""""""""

        """"""""""""""""""""""""""""""""""""""""""""

        for q in range(Q):
            model['B_q' + str(q) + '.W'] = 0.1 * np.random.randn(model['B_q0.W'].__len__())[:, None]
            model.kern_list[q].variance.fix()
        """"""""""""""""""""""""""""""""""""""""""""""""""""""
        print(model['B'])
        print('Initial Log Likelihood:\n',model.log_likelihood())

        if method == 'adam':
            opt = climin.Adam(model.optimizer_array, model.stochastic_grad, step_rate=0.005, decay_mom1=1 - 0.9,decay_mom2=1 - 0.999)
            ELBO.append(model.log_likelihood())
            #NLPD.append(model.negative_log_predictive(Xtest, Ytest, num_samples=1000))
            start = time.time()
            myTimes.append(start)
            print('Running Adam...')
            info = opt.minimize_until(callback)

        elif method == 'adad':
            opt = climin.Adadelta(model.optimizer_array, model.stochastic_grad, step_rate=0.005, momentum=0.9)
            ELBO.append(model.log_likelihood())
            #NLPD.append(model.negative_log_predictive(Xtest, Ytest, num_samples=1000))
            start = time.time()
            myTimes.append(start)
            print('Running Adadelta...')
            info = opt.minimize_until(callback)
Exemple #7
0
def vem_algorithm(model,
                  vem_iters=None,
                  maxIter_perVEM=None,
                  step_rate=None,
                  verbose=False,
                  optZ=True,
                  verbose_plot=False,
                  non_chained=True):
    if vem_iters is None:
        vem_iters = 5
    if maxIter_perVEM is None:
        maxIter_perVEM = 100

    model['.*.kappa'].fix()  # must be always fixed!
    if model.batch_size is None:

        for i in range(vem_iters):
            #######  VARIATIONAL E-STEP  #######
            model['.*.lengthscale'].fix()
            model['.*.variance'].fix()
            model.Z.fix()
            model['.*.W'].fix()

            model.q_u_means.unfix()
            model.q_u_chols.unfix()
            model.optimize(messages=verbose, max_iters=maxIter_perVEM)
            print('iteration (' + str(i + 1) + ') VE step, log_likelihood=' +
                  str(model.log_likelihood().flatten()))

            #######  VARIATIONAL M-STEP   #######
            model['.*.lengthscale'].unfix()
            model['.*.variance'].unfix()
            if optZ:
                model.Z.unfix()
            if non_chained:
                model['.*.W'].unfix()

            model.q_u_means.fix()
            model.q_u_chols.fix()
            model.optimize(messages=verbose, max_iters=maxIter_perVEM)
            print('iteration (' + str(i + 1) + ') VM step, log_likelihood=' +
                  str(model.log_likelihood().flatten()))

    else:

        if step_rate is None:
            step_rate = 0.01

        model.elbo = np.empty((maxIter_perVEM * vem_iters + 2, 1))
        model.elbo[0, 0] = model.log_likelihood()
        c_full = partial(model.callback,
                         max_iter=maxIter_perVEM,
                         verbose=verbose,
                         verbose_plot=verbose_plot)

        for i in range(vem_iters):
            #######  VARIATIONAL E-STEP  #######
            model['.*.lengthscale'].fix()
            model['.*.variance'].fix()
            model.Z.fix()
            model['.*.W'].fix()

            model.q_u_means.unfix()
            model.q_u_chols.unfix()
            optimizer = climin.Adam(model.optimizer_array,
                                    model.stochastic_grad,
                                    step_rate=step_rate,
                                    decay_mom1=1 - 0.9,
                                    decay_mom2=1 - 0.999)
            optimizer.minimize_until(c_full)
            print('iteration (' + str(i + 1) +
                  ') VE step, mini-batch log_likelihood=' +
                  str(model.log_likelihood().flatten()))

            #######  VARIATIONAL M-STEP  #######
            model['.*.lengthscale'].unfix()
            model['.*.variance'].unfix()
            if optZ:
                model.Z.unfix()
            if non_chained:
                model['.*.W'].unfix()

            model.q_u_means.fix()
            model.q_u_chols.fix()
            optimizer = climin.Adam(model.optimizer_array,
                                    model.stochastic_grad,
                                    step_rate=step_rate,
                                    decay_mom1=1 - 0.9,
                                    decay_mom2=1 - 0.999)
            optimizer.minimize_until(c_full)
            print('iteration (' + str(i + 1) +
                  ') VM step, mini-batch log_likelihood=' +
                  str(model.log_likelihood().flatten()))

    return model