Example #1
0
def maximizeVariationalLowerBound(x_train, y_train, qPrior, taskId):
    for x_train_batch, y_train_batch in getBatch(x_train, y_train):
        qPosterior = ParametersDistribution(sharedDim, headDim, headCount)
        qPosterior.overwrite(qPrior)
        parameters = qPosterior.getFlattenedParameters(taskId)
        optimizer = torch.optim.Adam(parameters, lr=0.001)
        lossArgs = (x_train_batch, y_train_batch, qPosterior, qPrior, taskId,
                    numSamples)
        minimizeLoss(1, optimizer, computeCost, lossArgs)
        qPrior.overwrite(qPosterior)
    return qPosterior
Example #2
0
def maximizeVariationalLowerBound(model, x_train, y_train, qPrior, taskId):
    qPosterior = ParametersDistribution(sharedDim, headDim, headCount)
    parameters = qPosterior.getFlattenedParameters(2)
    optimizer = torch.optim.Adam(parameters, lr=0.001)
    i = 0
    for x_train_batch, y_train_batch in getBatch(x_train, y_train):
        lossArgs = (model, x_train_batch, y_train_batch, qPosterior, qPrior,
                    taskId)
        minimizeLoss(1, optimizer, computeCost, lossArgs)
        i += 1
        if i > 99:
            break
    return qPosterior
Example #3
0
    def maximizeVariationalLowerBound(self,
                                      posterior,
                                      x_train,
                                      y_train,
                                      headId,
                                      t,
                                      isCoreset=False):
        # create dummy new posterior
        prior = ParametersDistribution(self.sharedWeightDim,
                                       self.headWeightDim, self.numHeads)
        prior.overwrite(posterior, True)
        if not isCoreset:
            posterior.initializeHeads(headId)

        # Overwrite done to detach from graph
        posterior.overwrite(posterior)

        parameters = posterior.getFlattenedParameters(headId)
        optimizer = torch.optim.Adam(parameters, lr=0.001)
        num_train_samples = 10
        for epoch in range(self.numEpochs):
            idx = torch.randperm(x_train.shape[0])
            x_train, y_train = x_train[idx], y_train[idx]
            for iter, train_batch in enumerate(self.getBatch(x_train,
                                                             y_train)):
                x_train_batch, y_train_batch = train_batch
                lossArgs = (x_train_batch, y_train_batch, posterior, prior,
                            headId, num_train_samples, self.alpha)
                loss = minimizeLoss(1, optimizer, computeCost, lossArgs)
                if iter % 100 == 0:
                    print('Max Variational ELBO: #epoch: [{}/{}], #batch: [{}/{}], loss: {}'\
                          .format(epoch+1, self.numEpochs, iter+1, self.getNumBatches(x_train), loss))
        return posterior
Example #4
0
sys.path.append('../')
sys.path.append('../src/')

import torch
import torch.autograd as autograd
from KL import KL
from optimizer import minimizeLoss
from parameters_distribution import ParametersDistribution
from constants import FloatTensor, MEAN, VARIANCE, WEIGHT, BIAS

sharedDim = (3, 3, 3)
headDim = (2, 3, 1)
headCount = 3
qPrior = ParametersDistribution(sharedDim, headDim, headCount)
qPosterior1 = ParametersDistribution(sharedDim, headDim, headCount)
qPosterior2 = ParametersDistribution(sharedDim, headDim, headCount)

kl = KL()

parameters = qPosterior1.getFlattenedParameters(2)
optimizer = torch.optim.Adam(parameters, lr = 0.001)
lossArgs = (qPosterior1, qPrior, 2)
minimizeLoss(1000, optimizer, kl.computeKL, lossArgs)

print('\n--------- Change initialization ---------\n')

parameters = qPosterior2.getFlattenedParameters(2)
optimizer = torch.optim.Adam(parameters, lr = 0.001)
lossArgs = (qPosterior2, qPrior, 2)
minimizeLoss(1000, optimizer, kl.computeKL, lossArgs)
Example #5
0

def printParameterInfo(qPrior, qPosterior, title):
    topBanner = '--------------- {} ---------------'.format(title)
    print('\n' + topBanner)
    print('Cost: {}'.format(computeCost(qPrior, qPosterior)))
    print('Sum for qPosterior: {}'.format(
        sumAllParameters(qPosterior.getFlattenedParameters(1))))
    print('Sum for qPrior: {}'.format(
        sumAllParameters(qPrior.getFlattenedParameters(1))))
    print('-' * len(topBanner) + '\n')


sharedWeightDim = (LAYER_COUNT - 1, INPUT_SIZE, HIDDEN_SIZE)
headWeightDim = (1, HIDDEN_SIZE, CLASS_COUNT)

vanillaNN = VanillaNN(INPUT_SIZE, HIDDEN_SIZE, LAYER_COUNT, CLASS_COUNT)
qPrior = ParametersDistribution(sharedWeightDim, headWeightDim, HEAD_COUNT)
qPosterior = ParametersDistribution(sharedWeightDim, headWeightDim, HEAD_COUNT)
# qPosterior.overwrite(qPrior)

printParameterInfo(qPrior, qPosterior, 'Before')

parameters = qPosterior.getFlattenedParameters(1)
optimizer = torch.optim.Adam(parameters, lr=0.01)

lossArgs = (qPosterior, qPrior)
minimizeLoss(1000, optimizer, computeCost, lossArgs)

printParameterInfo(qPrior, qPosterior, 'After')