def __init__(self, numberOfInducingPoints, # Number of inducing ponts in sparse GP batchSize, # Size of mini batch dimX, # Dimensionality of the latent co-ordinates dimZ, # Dimensionality of the latent variables data, # [NxP] matrix of observations kernelType='ARD', encoderType_qX='FreeForm2', # MLP', 'Kernel'. encoderType_rX='FreeForm2', # MLP', 'Kernel'. Xu_optimise=False, numHiddenUnits_encoder=10, numHiddenUnits_decoder=10, numHiddenLayers_decoder=2, continuous=True ): SGPDV.__init__(self, numberOfInducingPoints, # Number of inducing ponts in sparse GP batchSize, # Size of mini batch dimX, # Dimensionality of the latent co-ordinates dimZ, # Dimensionality of the latent variables data, # [NxP] matrix of observations kernelType=kernelType, encoderType_qX=encoderType_qX, encoderType_rX=encoderType_rX, Xu_optimise=Xu_optimise, numberOfEncoderHiddenUnits=numHiddenUnits_encoder ) self.HU_decoder = numHiddenUnits_decoder self.numHiddenLayers_decoder = numHiddenLayers_decoder self.continuous = continuous # Construct appropriately sized matrices to initialise theano shares self.W_zh = sharedZeroMatrix(self.HU_decoder, self.Q, 'W_zh') self.W_hy1 = sharedZeroMatrix(self.P, self.HU_decoder, 'W_hy') self.b_zh = sharedZeroVector(self.HU_decoder, 'b_zh', broadcastable=(False,True)) self.b_hy1 = sharedZeroVector(self.P, 'b_zh', broadcastable=(False,True)) self.likelihoodVariables = [self.W_zh, self.W_hy1, self.b_zh, self.b_hy1] if self.numHiddenLayers_decoder == 2: self.W_hh = sharedZeroMatrix(self.HU_decoder, self.HU_decoder, 'W_hh') self.b_hh = sharedZeroVector(self.HU_decoder, 'b_hh', broadcastable=(False,True)) self.likelihoodVariables.extend([self.W_hh, self.b_hh]) if self.continuous: self.W_hy2 = sharedZeroMatrix(self.P, self.HU_decoder, 'W_hy2') self.b_hy2 = sharedZeroVector(self.P, 'b_hy2', broadcastable=(False,True)) self.likelihoodVariables.extend([self.W_hy2, self.b_hy2]) self.gradientVariables.extend(self.likelihoodVariables) # Keep track of bounds and gradients for post analysis self.all_bounds = [] self.all_gradients = []
def __init__(self, numberOfInducingPoints, # Number of inducing ponts in sparse GP batchSize, # Size of mini batch dimX, # Dimensionality of the latent co-ordinates dimZ, # Dimensionality of the latent variables data, # [NxP] matrix of observations kernelType='ARD', encoderType_qX='FreeForm2', # 'MLP', 'Kernel'. encoderType_rX='FreeForm2', # 'MLP', 'Kernel' Xu_optimise=False, numberOfEncoderHiddenUnits=10 ): self.numTestSamples = 5000 # set the data data = np.asarray(data, dtype=precision) self.N = data.shape[0] # Number of observations self.P = data.shape[1] # Dimension of each observation self.M = numberOfInducingPoints self.B = batchSize self.R = dimX self.Q = dimZ self.H = numberOfEncoderHiddenUnits self.encoderType_qX = encoderType_qX self.encoderType_rX = encoderType_rX self.Xu_optimise = Xu_optimise self.y = th.shared(data) self.y.name = 'y' if kernelType == 'RBF': self.numberOfKernelParameters = 2 elif kernelType == 'RBFnn': self.numberOfKernelParameters = 1 elif kernelType == 'ARD': self.numberOfKernelParameters = self.R + 1 else: raise RuntimeError('Unrecognised kernel type') self.lowerBound = -np.inf # Lower bound self.numberofBatchesPerEpoch = int(np.ceil(np.float32(self.N) / self.B)) numPad = self.numberofBatchesPerEpoch * self.B - self.N self.batchStream = srng.permutation(n=self.N) self.padStream = srng.choice(size=(numPad,), a=self.N, replace=False, p=None, ndim=None, dtype='int32') self.batchStream.name = 'batchStream' self.padStream.name = 'padStream' self.iterator = th.shared(0) self.iterator.name = 'iterator' self.allBatches = T.reshape(T.concatenate((self.batchStream, self.padStream)), [self.numberofBatchesPerEpoch, self.B]) self.currentBatch = T.flatten(self.allBatches[self.iterator, :]) self.allBatches.name = 'allBatches' self.currentBatch.name = 'currentBatch' self.y_miniBatch = self.y[self.currentBatch, :] self.y_miniBatch.name = 'y_miniBatch' self.jitterDefault = np.float64(0.0001) self.jitterGrowthFactor = np.float64(1.1) self.jitter = th.shared(np.asarray(self.jitterDefault, dtype='float64'), name='jitter') kfactory = kernelFactory(kernelType) # kernel parameters self.log_theta = sharedZeroMatrix(1, self.numberOfKernelParameters, 'log_theta', broadcastable=(True,False)) # parameters of Kuu, Kuf, Kff self.log_omega = sharedZeroMatrix(1, self.numberOfKernelParameters, 'log_omega', broadcastable=(True,False)) # parameters of Kuu, Kuf, Kff self.log_gamma = sharedZeroMatrix(1, self.numberOfKernelParameters, 'log_gamma', broadcastable=(True,False)) # parameters of Kuu, Kuf, Kff # Random variables self.xi = srng.normal(size=(self.B, self.R), avg=0.0, std=1.0, ndim=None) self.alpha = srng.normal(size=(self.M, self.Q), avg=0.0, std=1.0, ndim=None) self.beta = srng.normal(size=(self.B, self.Q), avg=0.0, std=1.0, ndim=None) self.xi.name = 'xi' self.alpha.name = 'alpha' self.beta.name = 'beta' self.sample_xi = th.function([], self.xi) self.sample_alpha = th.function([], self.alpha) self.sample_beta = th.function([], self.beta) self.sample_batchStream = th.function([], self.batchStream) self.sample_padStream = th.function([], self.padStream) self.getCurrentBatch = th.function([], self.currentBatch, no_default_updates=True) # Compute parameters of q(X) if self.encoderType_qX == 'FreeForm1' or self.encoderType_qX == 'FreeForm2': # Have a normal variational distribution over location of latent co-ordinates self.phi_full = sharedZeroMatrix(self.N, self.R, 'phi_full') self.phi = self.phi_full[self.currentBatch, :] self.phi.name = 'phi' if encoderType_qX == 'FreeForm1': self.Phi_full_sqrt = sharedZeroMatrix(self.N, self.N, 'Phi_full_sqrt') Phi_batch_sqrt = self.Phi_full_sqrt[self.currentBatch][:, self.currentBatch] Phi_batch_sqrt.name = 'Phi_batch_sqrt' self.Phi = dot(Phi_batch_sqrt, Phi_batch_sqrt.T, 'Phi') self.cPhi, _, self.logDetPhi = cholInvLogDet(self.Phi, self.B, 0) self.qX_vars = [self.Phi_full_sqrt, self.phi_full] else: self.Phi_full_logdiag = sharedZeroArray(self.N, 'Phi_full_logdiag') Phi_batch_logdiag = self.Phi_full_logdiag[self.currentBatch] Phi_batch_logdiag.name = 'Phi_batch_logdiag' self.Phi, self.cPhi, _, self.logDetPhi \ = diagCholInvLogDet_fromLogDiag(Phi_batch_logdiag, 'Phi') self.qX_vars = [self.Phi_full_logdiag, self.phi_full] elif self.encoderType_qX == 'MLP': # Auto encode self.W1_qX = sharedZeroMatrix(self.H, self.P, 'W1_qX') self.W2_qX = sharedZeroMatrix(self.R, self.H, 'W2_qX') self.W3_qX = sharedZeroMatrix(1, self.H, 'W3_qX') self.b1_qX = sharedZeroVector(self.H, 'b1_qX', broadcastable=(False, True)) self.b2_qX = sharedZeroVector(self.R, 'b2_qX', broadcastable=(False, True)) self.b3_qX = sharedZeroVector(1, 'b3_qX', broadcastable=(False, True)) # [HxB] = softplus( [HxP] . [BxP]^T + repmat([Hx1],[1,B]) ) h_qX = softplus(plus(dot(self.W1_qX, self.y_miniBatch.T), self.b1_qX), 'h_qX' ) # [RxB] = sigmoid( [RxH] . [HxB] + repmat([Rx1],[1,B]) ) mu_qX = plus(dot(self.W2_qX, h_qX), self.b2_qX, 'mu_qX') # [1xB] = 0.5 * ( [1xH] . [HxB] + repmat([1x1],[1,B]) ) log_sigma_qX = mul( 0.5, plus(dot(self.W3_qX, h_qX), self.b3_qX), 'log_sigma_qX') self.phi = mu_qX.T # [BxR] self.Phi, self.cPhi, self.iPhi,self.logDetPhi \ = diagCholInvLogDet_fromLogDiag(log_sigma_qX, 'Phi') self.qX_vars = [self.W1_qX, self.W2_qX, self.W3_qX, self.b1_qX, self.b2_qX, self.b3_qX] elif self.encoderType_qX == 'Kernel': # Draw the latent coordinates from a GP with data co-ordinates self.Phi = kfactory.kernel(self.y_miniBatch, None, self.log_gamma, 'Phi') self.phi = sharedZeroMatrix(self.B, self.R, 'phi') (self.cPhi, self.iPhi, self.logDetPhi) = cholInvLogDet(self.Phi, self.B, self.jitter) self.qX_vars = [self.log_gamma] else: raise RuntimeError('Unrecognised encoding for q(X): ' + self.encoderType_qX) # Variational distribution q(u) self.kappa = sharedZeroMatrix(self.M, self.Q, 'kappa') self.Kappa_sqrt = sharedZeroMatrix(self.M, self.M, 'Kappa_sqrt') self.Kappa = dot(self.Kappa_sqrt, self.Kappa_sqrt.T, 'Kappa') (self.cKappa, self.iKappa, self.logDetKappa) \ = cholInvLogDet(self.Kappa, self.M, 0) self.qu_vars = [self.Kappa_sqrt, self.kappa] # Calculate latent co-ordinates Xf # [BxR] = [BxR] + [BxB] . [BxR] self.Xz = plus( self.phi, dot(self.cPhi, self.xi), 'Xf' ) # Inducing points co-ordinates self.Xu = sharedZeroMatrix(self.M, self.R, 'Xu') # Kernels self.Kzz = kfactory.kernel(self.Xz, None, self.log_theta, 'Kff') self.Kuu = kfactory.kernel(self.Xu, None, self.log_theta, 'Kuu') self.Kzu = kfactory.kernel(self.Xz, self.Xu, self.log_theta, 'Kfu') self.cKuu, self.iKuu, self.logDetKuu = cholInvLogDet(self.Kuu, self.M, self.jitter) # Variational distribution # A has dims [BxM] = [BxM] . [MxM] self.A = dot(self.Kzu, self.iKuu, 'A') # L is the covariance of conditional distribution q(z|u,Xf) self.C = minus( self.Kzz, dot(self.A, self.Kzu.T), 'C') self.cC, self.iC, self.logDetC = cholInvLogDet(self.C, self.B, self.jitter) # Sample u_q from q(u_q) = N(u_q; kappa_q, Kappa ) [MxQ] self.u = plus(self.kappa, (dot(self.cKappa, self.alpha)), 'u') # compute mean of z [QxB] # [BxQ] = [BxM] * [MxQ] self.mu = dot(self.A, self.u, 'mu') # Sample f from q(f|u,X) = N( mu_q, C ) # [BxQ] = self.z = plus(self.mu, (dot(self.cC, self.beta)), 'z') self.qz_vars = [self.log_theta] self.iUpsilon = plus(self.iKappa, dot(self.A.T, dot(self.iC, self.A) ), 'iUpsilon') _, self.Upsilon, self.negLogDetUpsilon = cholInvLogDet(self.iUpsilon, self.M, self.jitter) if self.encoderType_rX == 'MLP': self.W1_rX = sharedZeroMatrix(self.H, self.Q+self.P, 'W1_rX') self.W2_rX = sharedZeroMatrix(self.R, self.H, 'W2_rX') self.W3_rX = sharedZeroMatrix(self.R, self.H, 'W3_rX') self.b1_rX = sharedZeroVector(self.H, 'b1_rX', broadcastable=(False, True)) self.b2_rX = sharedZeroVector(self.R, 'b2_rX', broadcastable=(False, True)) self.b3_rX = sharedZeroVector(self.R, 'b3_rX', broadcastable=(False, True)) # [HxB] = softplus( [Hx(Q+P)] . [(Q+P)xB] + repmat([Hx1], [1,B]) ) h_rX = softplus(plus(dot(self.W1_rX, T.concatenate((self.z.T, self.y_miniBatch.T))), self.b1_rX), 'h_rX') # [RxB] = softplus( [RxH] . [HxB] + repmat([Rx1], [1,B]) ) mu_rX = plus(dot(self.W2_rX, h_rX), self.b2_rX, 'mu_rX') # [RxB] = 0.5*( [RxH] . [HxB] + repmat([Rx1], [1,B]) ) log_sigma_rX = mul( 0.5, plus(dot(self.W3_rX, h_rX), self.b3_rX), 'log_sigma_rX') self.tau = mu_rX.T # Diagonal optimisation of Tau self.Tau_isDiagonal = True self.Tau = T.reshape(log_sigma_rX, [self.B * self.R, 1]) self.logDetTau = T.sum(log_sigma_rX) self.Tau.name = 'Tau' self.logDetTau.name = 'logDetTau' self.rX_vars = [self.W1_rX, self.W2_rX, self.W3_rX, self.b1_rX, self.b2_rX, self.b3_rX] elif self.encoderType_rX == 'Kernel': self.tau = sharedZeroMatrix(self.B, self.R, 'tau') # Tau_r [BxB] = kernel( [[BxQ]^T,[BxP]^T].T ) Tau_r = kfactory.kernel(T.concatenate((self.z.T, self.y_miniBatch.T)).T, None, self.log_omega, 'Tau_r') (cTau_r, iTau_r, logDetTau_r) = cholInvLogDet(Tau_r, self.B, self.jitter) # self.Tau = slinalg.kron(T.eye(self.R), Tau_r) self.cTau = slinalg.kron(cTau_r, T.eye(self.R)) self.iTau = slinalg.kron(iTau_r, T.eye(self.R)) self.logDetTau = logDetTau_r * self.R self.tau.name = 'tau' # self.Tau.name = 'Tau' self.cTau.name = 'cTau' self.iTau.name = 'iTau' self.logDetTau.name = 'logDetTau' self.Tau_isDiagonal = False self.rX_vars = [self.log_omega] else: raise RuntimeError('Unrecognised encoding for r(X|z)') # Gradient variables - should be all the th.shared variables # We always want to optimise these variables if self.Xu_optimise: self.gradientVariables = [self.Xu] else: self.gradientVariables = [] self.gradientVariables.extend(self.qu_vars) self.gradientVariables.extend(self.qz_vars) self.gradientVariables.extend(self.qX_vars) self.gradientVariables.extend(self.rX_vars) self.lowerBounds = [] self.condKappa = myCond()(self.Kappa) self.condKappa.name = 'condKappa' self.Kappa_conditionNumber = th.function([], self.condKappa, no_default_updates=True) self.condKuu = myCond()(self.Kuu) self.condKuu.name = 'condKuu' self.Kuu_conditionNumber = th.function([], self.condKuu, no_default_updates=True) self.condC = myCond()(self.C) self.condC.name = 'condC' self.C_conditionNumber = th.function([], self.condC, no_default_updates=True) self.condUpsilon = myCond()(self.Upsilon) self.condUpsilon.name = 'condUpsilon' self.Upsilon_conditionNumber = th.function([], self.condUpsilon, no_default_updates=True) self.Xz_get_value = th.function([], self.Xz, no_default_updates=True)
def __init__( self, numberOfInducingPoints, # Number of inducing ponts in sparse GP batchSize, # Size of mini batch dimX, # Dimensionality of the latent co-ordinates dimZ, # Dimensionality of the latent variables data, # [NxP] matrix of observations kernelType='ARD', encoderType_qX='FreeForm2', # MLP', 'Kernel'. encoderType_rX='FreeForm2', # MLP', 'Kernel'. Xu_optimise=False, numHiddenUnits_encoder=10, numHiddenUnits_decoder=10, numHiddenLayers_decoder=2, continuous=True): SGPDV.__init__( self, numberOfInducingPoints, # Number of inducing ponts in sparse GP batchSize, # Size of mini batch dimX, # Dimensionality of the latent co-ordinates dimZ, # Dimensionality of the latent variables data, # [NxP] matrix of observations kernelType=kernelType, encoderType_qX=encoderType_qX, encoderType_rX=encoderType_rX, Xu_optimise=Xu_optimise, numberOfEncoderHiddenUnits=numHiddenUnits_encoder) self.HU_decoder = numHiddenUnits_decoder self.numHiddenLayers_decoder = numHiddenLayers_decoder self.continuous = continuous # Construct appropriately sized matrices to initialise theano shares self.W_zh = sharedZeroMatrix(self.HU_decoder, self.Q, 'W_zh') self.W_hy1 = sharedZeroMatrix(self.P, self.HU_decoder, 'W_hy') self.b_zh = sharedZeroVector(self.HU_decoder, 'b_zh', broadcastable=(False, True)) self.b_hy1 = sharedZeroVector(self.P, 'b_zh', broadcastable=(False, True)) self.likelihoodVariables = [ self.W_zh, self.W_hy1, self.b_zh, self.b_hy1 ] if self.numHiddenLayers_decoder == 2: self.W_hh = sharedZeroMatrix(self.HU_decoder, self.HU_decoder, 'W_hh') self.b_hh = sharedZeroVector(self.HU_decoder, 'b_hh', broadcastable=(False, True)) self.likelihoodVariables.extend([self.W_hh, self.b_hh]) if self.continuous: self.W_hy2 = sharedZeroMatrix(self.P, self.HU_decoder, 'W_hy2') self.b_hy2 = sharedZeroVector(self.P, 'b_hy2', broadcastable=(False, True)) self.likelihoodVariables.extend([self.W_hy2, self.b_hy2]) self.gradientVariables.extend(self.likelihoodVariables) # Keep track of bounds and gradients for post analysis self.all_bounds = [] self.all_gradients = []