def maximizeVariationalLowerBound(x_train, y_train, qPrior, taskId): for x_train_batch, y_train_batch in getBatch(x_train, y_train): qPosterior = ParametersDistribution(sharedDim, headDim, headCount) qPosterior.overwrite(qPrior) parameters = qPosterior.getFlattenedParameters(taskId) optimizer = torch.optim.Adam(parameters, lr=0.001) lossArgs = (x_train_batch, y_train_batch, qPosterior, qPrior, taskId, numSamples) minimizeLoss(1, optimizer, computeCost, lossArgs) qPrior.overwrite(qPosterior) return qPosterior
def maximizeVariationalLowerBound(model, x_train, y_train, qPrior, taskId): qPosterior = ParametersDistribution(sharedDim, headDim, headCount) parameters = qPosterior.getFlattenedParameters(2) optimizer = torch.optim.Adam(parameters, lr=0.001) i = 0 for x_train_batch, y_train_batch in getBatch(x_train, y_train): lossArgs = (model, x_train_batch, y_train_batch, qPosterior, qPrior, taskId) minimizeLoss(1, optimizer, computeCost, lossArgs) i += 1 if i > 99: break return qPosterior
def maximizeVariationalLowerBound(self, posterior, x_train, y_train, headId, t, isCoreset=False): # create dummy new posterior prior = ParametersDistribution(self.sharedWeightDim, self.headWeightDim, self.numHeads) prior.overwrite(posterior, True) if not isCoreset: posterior.initializeHeads(headId) # Overwrite done to detach from graph posterior.overwrite(posterior) parameters = posterior.getFlattenedParameters(headId) optimizer = torch.optim.Adam(parameters, lr=0.001) num_train_samples = 10 for epoch in range(self.numEpochs): idx = torch.randperm(x_train.shape[0]) x_train, y_train = x_train[idx], y_train[idx] for iter, train_batch in enumerate(self.getBatch(x_train, y_train)): x_train_batch, y_train_batch = train_batch lossArgs = (x_train_batch, y_train_batch, posterior, prior, headId, num_train_samples, self.alpha) loss = minimizeLoss(1, optimizer, computeCost, lossArgs) if iter % 100 == 0: print('Max Variational ELBO: #epoch: [{}/{}], #batch: [{}/{}], loss: {}'\ .format(epoch+1, self.numEpochs, iter+1, self.getNumBatches(x_train), loss)) return posterior
sys.path.append('../') sys.path.append('../src/') import torch import torch.autograd as autograd from KL import KL from optimizer import minimizeLoss from parameters_distribution import ParametersDistribution from constants import FloatTensor, MEAN, VARIANCE, WEIGHT, BIAS sharedDim = (3, 3, 3) headDim = (2, 3, 1) headCount = 3 qPrior = ParametersDistribution(sharedDim, headDim, headCount) qPosterior1 = ParametersDistribution(sharedDim, headDim, headCount) qPosterior2 = ParametersDistribution(sharedDim, headDim, headCount) kl = KL() parameters = qPosterior1.getFlattenedParameters(2) optimizer = torch.optim.Adam(parameters, lr = 0.001) lossArgs = (qPosterior1, qPrior, 2) minimizeLoss(1000, optimizer, kl.computeKL, lossArgs) print('\n--------- Change initialization ---------\n') parameters = qPosterior2.getFlattenedParameters(2) optimizer = torch.optim.Adam(parameters, lr = 0.001) lossArgs = (qPosterior2, qPrior, 2) minimizeLoss(1000, optimizer, kl.computeKL, lossArgs)
def printParameterInfo(qPrior, qPosterior, title): topBanner = '--------------- {} ---------------'.format(title) print('\n' + topBanner) print('Cost: {}'.format(computeCost(qPrior, qPosterior))) print('Sum for qPosterior: {}'.format( sumAllParameters(qPosterior.getFlattenedParameters(1)))) print('Sum for qPrior: {}'.format( sumAllParameters(qPrior.getFlattenedParameters(1)))) print('-' * len(topBanner) + '\n') sharedWeightDim = (LAYER_COUNT - 1, INPUT_SIZE, HIDDEN_SIZE) headWeightDim = (1, HIDDEN_SIZE, CLASS_COUNT) vanillaNN = VanillaNN(INPUT_SIZE, HIDDEN_SIZE, LAYER_COUNT, CLASS_COUNT) qPrior = ParametersDistribution(sharedWeightDim, headWeightDim, HEAD_COUNT) qPosterior = ParametersDistribution(sharedWeightDim, headWeightDim, HEAD_COUNT) # qPosterior.overwrite(qPrior) printParameterInfo(qPrior, qPosterior, 'Before') parameters = qPosterior.getFlattenedParameters(1) optimizer = torch.optim.Adam(parameters, lr=0.01) lossArgs = (qPosterior, qPrior) minimizeLoss(1000, optimizer, computeCost, lossArgs) printParameterInfo(qPrior, qPosterior, 'After')