def buildKernel(self, X, kernelNames, regions, yVar): numVarComp = len(regions) hyp0_kernels = [] kernelsList = [] for r_i, r in enumerate(regions): regionSize = r.sum() kernelName = kernelNames[r_i] #choose kernel if (kernelName == 'lin'): kernel = kernels.linearKernel(X[:, r]) elif (kernelName == 'rbf_lin'): kernel1 = kernels.ScaledKernel(kernels.RBFKernel(X[:, r])) hyp0_kernels.append(np.log(1.0)) #ell hyp0_kernels.append(0.5*np.log(0.5*yVar / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append(0.5*np.log(0.5*yVar / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName == 'poly2_lin'): kernel1 = kernels.ScaledKernel(kernels.Poly2KernelHomo(kernels.linearKernel(X[:, r]))) hyp0_kernels.append(0.5*np.log(0.5 / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append(0.5*np.log(0.5 / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName == 'poly3_lin'): kernel1 = kernels.ScaledKernel(kernels.Poly3KernelHomo(kernels.linearKernel(X[:, r]))) hyp0_kernels.append(0.5*np.log(0.5 / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append(0.5*np.log(0.5 / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName in ['nn_lin']): kernel1 = kernels.ScaledKernel(kernels.NNKernel(X[:, r])) hyp0_kernels.append(np.log(1.0)) #ell hyp0_kernels.append(0.5*np.log(0.5*yVar / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append(0.5*np.log(0.5*yVar / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName == 'matern5_lin'): kernel1 = kernels.ScaledKernel(kernels.Matern5Kernel(X[:, r])) hyp0_kernels.append(np.log(1.0)) #ell hyp0_kernels.append(0.5*np.log(0.5*yVar / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append(0.5*np.log(0.5*yVar / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName == 'matern3_lin'): kernel1 = kernels.ScaledKernel(kernels.Matern3Kernel(X[:, r])) hyp0_kernels.append(np.log(1.0)) #ell hyp0_kernels.append(0.5*np.log(0.5*yVar / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append(0.5*np.log(0.5*yVar / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName == 'poly2'): kernel = kernels.linearKernel(X[:, r]) kernel = kernels.Poly2Kernel(kernel) hyp0_kernels.append(np.log(1.0)) #bias hyp elif (kernelName == 'poly3'): kernel = kernels.linearKernel(X[:, r]) kernel = kernels.Poly3Kernel(kernel) hyp0_kernels.append(np.log(1.0)) #bias hyp elif (kernelName == 'rbf'): kernel = kernels.RBFKernel(X[:, r]) hyp0_kernels.append(np.log(1.0)) #ell elif (kernelName == 'gabor'): kernel = kernels.GaborKernel(X[:, r]) hyp0_kernels += [np.log(1.0), np.log(1.0)] #ell and p elif (kernelName == 'nn'): kernel = kernels.NNKernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'rq'): kernel = kernels.RQKernel(X[:, r]) hyp0_kernels += [np.log(1.0), np.log(1.0)] #ell and alpha elif (kernelName == 'matern1'): kernel = kernels.Matern1Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'matern3'): kernel = kernels.Matern3Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'matern5'): kernel = kernels.Matern5Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'pp0'): kernel = kernels.PP0Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'pp1'): kernel = kernels.PP1Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'pp2'): kernel = kernels.PP2Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'pp3'): kernel = kernels.PP3Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell else: raise Exception('unknown kernel: ' + kernelName) #scale kernel kernel = kernels.ScaledKernel(kernel) hyp0_kernels.append(0.5*np.log(0.5*yVar / numVarComp)) #scaling hyp kernelsList.append(kernel) if (kernelName in ['add']): combinedKernel = kernels.AdditiveKernel(kernelsList, y.shape[0]) hyp0_kernels = np.concatenate((np.zeros(len(kernelsList)), hyp0_kernels)) else: combinedKernel = kernels.SumKernel(kernelsList) return combinedKernel, hyp0_kernels
def buildKernel(self, X, kernelNames, regions, yVar): numVarComp = len(regions) hyp0_kernels = [] kernelsList = [] for r_i, r in enumerate(regions): regionSize = r.sum() kernelName = kernelNames[r_i] #choose kernel if (kernelName == 'lin'): kernel = kernels.linearKernel(X[:, r]) elif (kernelName == 'rbf_lin'): kernel1 = kernels.ScaledKernel(kernels.RBFKernel(X[:, r])) hyp0_kernels.append(np.log(1.0)) #ell hyp0_kernels.append( 0.5 * np.log(0.5 * yVar / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append( 0.5 * np.log(0.5 * yVar / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName == 'poly2_lin'): kernel1 = kernels.ScaledKernel( kernels.Poly2KernelHomo(kernels.linearKernel(X[:, r]))) hyp0_kernels.append(0.5 * np.log(0.5 / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append(0.5 * np.log(0.5 / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName == 'poly3_lin'): kernel1 = kernels.ScaledKernel( kernels.Poly3KernelHomo(kernels.linearKernel(X[:, r]))) hyp0_kernels.append(0.5 * np.log(0.5 / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append(0.5 * np.log(0.5 / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName in ['nn_lin']): kernel1 = kernels.ScaledKernel(kernels.NNKernel(X[:, r])) hyp0_kernels.append(np.log(1.0)) #ell hyp0_kernels.append( 0.5 * np.log(0.5 * yVar / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append( 0.5 * np.log(0.5 * yVar / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName == 'matern5_lin'): kernel1 = kernels.ScaledKernel(kernels.Matern5Kernel(X[:, r])) hyp0_kernels.append(np.log(1.0)) #ell hyp0_kernels.append( 0.5 * np.log(0.5 * yVar / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append( 0.5 * np.log(0.5 * yVar / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName == 'matern3_lin'): kernel1 = kernels.ScaledKernel(kernels.Matern3Kernel(X[:, r])) hyp0_kernels.append(np.log(1.0)) #ell hyp0_kernels.append( 0.5 * np.log(0.5 * yVar / numVarComp)) #scaling hyp kernelsList.append(kernel1) kernel2 = kernels.ScaledKernel(kernels.linearKernel(X[:, r])) hyp0_kernels.append( 0.5 * np.log(0.5 * yVar / numVarComp)) #scaling hyp kernelsList.append(kernel2) continue elif (kernelName == 'poly2'): kernel = kernels.linearKernel(X[:, r]) kernel = kernels.Poly2Kernel(kernel) hyp0_kernels.append(np.log(1.0)) #bias hyp elif (kernelName == 'poly3'): kernel = kernels.linearKernel(X[:, r]) kernel = kernels.Poly3Kernel(kernel) hyp0_kernels.append(np.log(1.0)) #bias hyp elif (kernelName == 'rbf'): kernel = kernels.RBFKernel(X[:, r]) hyp0_kernels.append(np.log(1.0)) #ell elif (kernelName == 'gabor'): kernel = kernels.GaborKernel(X[:, r]) hyp0_kernels += [np.log(1.0), np.log(1.0)] #ell and p elif (kernelName == 'nn'): kernel = kernels.NNKernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'rq'): kernel = kernels.RQKernel(X[:, r]) hyp0_kernels += [np.log(1.0), np.log(1.0)] #ell and alpha elif (kernelName == 'matern1'): kernel = kernels.Matern1Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'matern3'): kernel = kernels.Matern3Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'matern5'): kernel = kernels.Matern5Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'pp0'): kernel = kernels.PP0Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'pp1'): kernel = kernels.PP1Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'pp2'): kernel = kernels.PP2Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell elif (kernelName == 'pp3'): kernel = kernels.PP3Kernel(X[:, r]) hyp0_kernels += [np.log(1.0)] #ell else: raise Exception('unknown kernel: ' + kernelName) #scale kernel kernel = kernels.ScaledKernel(kernel) hyp0_kernels.append(0.5 * np.log(0.5 * yVar / numVarComp)) #scaling hyp kernelsList.append(kernel) if (kernelName in ['add']): combinedKernel = kernels.AdditiveKernel(kernelsList, y.shape[0]) hyp0_kernels = np.concatenate( (np.zeros(len(kernelsList)), hyp0_kernels)) else: combinedKernel = kernels.SumKernel(kernelsList) return combinedKernel, hyp0_kernels
def buildKernelAdapt(self, X, C, y, regions, reml=True, maxiter=100): #prepare initial values for sig2e and for fixed effects hyp0_sig2e, hyp0_fixedEffects = self.getInitialHyps(X, C, y) bestKernelNames = [] kernelsListAll = [] hyp_kernels = [] funcToSolve = self.infExact_scipy yVar = y.var() for r_i, r in enumerate(regions): #if (r_i == 0): kernelsToTry = ['lin'] #else: # kernelsToTry = ['lin', 'poly2_lin', 'rbf_lin', 'nn_lin'] kernelsToTry = ['lin', 'poly2_lin', 'rbf_lin', 'nn_lin'] if self.verbose: print print 'selecting a kernel for region', r_i, 'with', r.sum(), 'SNPs' #add linear kernel X_lastRegion = X[:, r] linKernel = kernels.linearKernel(X_lastRegion) kernelsListAll.append(kernels.ScaledKernel(linKernel)) kernelsListAll.append(None) bestFun = np.inf bestKernelName = None best_hyp0 = None bestKernel = None bestPval = np.inf #iterate over every possible kernel for kernelToTry in kernelsToTry: hyp0 = [0.5*np.log(0.5*yVar)] if self.verbose: print 'Testing kernel:', kernelToTry #create the kernel if (kernelToTry == 'lin'): kernel = None df = None elif (kernelToTry == 'rbf_lin'): kernel = kernels.RBFKernel(X_lastRegion) hyp0.append(np.log(1.0)) #ell df = 2 elif (kernelToTry == 'nn_lin'): kernel = kernels.NNKernel(X_lastRegion) hyp0.append(np.log(1.0)) #ell df = 2 elif (kernelToTry == 'poly2_lin'): kernel = kernels.Poly2KernelHomo(linKernel) df = 1 else: raise Exception('unrecognized kernel name') if (kernel is not None): #scale the kernel kernel = kernels.ScaledKernel(kernel) hyp0.append(0.5*np.log(0.5*yVar)) #scaling hyp #add the kernel as the final kernel in the kernels list kernelsListAll[-1] = kernel sumKernel = kernels.SumKernel(kernelsListAll) else: sumKernel = kernels.SumKernel(kernelsListAll[:-1]) #test log likelihood obtained with this kernel for this region args = (sumKernel, C, y, reml) self.optimization_counter=0 hyp0_all = np.concatenate((hyp0_sig2e, hyp0_fixedEffects, hyp_kernels+hyp0)) optObj = gpUtils.minimize(hyp0_all, funcToSolve, -maxiter, *args) if (not optObj.success): print 'Optimization status:', optObj.status print 'optimization message:', optObj.message raise Exception('optimization failed') print 'final LL: %0.5e'%(-optObj.fun) if (kernelToTry == 'lin'): linLL = -optObj.fun pVal = 1.0 else: llDiff = -optObj.fun - linLL if (llDiff < 0): pVal = 1.0 else: pVal = 0.5*stats.chi2(df).sf(llDiff) print 'llDiff: %0.5e'%llDiff, 'pVal:%0.5e'%pVal if (kernelToTry == 'lin' or (pVal < bestPval and (len(kernelsToTry)==1 or pVal < 0.05/(len(kernelsToTry)-1)))): bestOptObj = optObj bestPval = pVal bestKernelName = kernelToTry best_hyp0 = hyp0 best_sumKernel = sumKernel bestKernel = kernel if (bestKernel is not None): kernelsListAll[-1] = bestKernel else: kernelsListAll = kernelsListAll[:-1] hyp_kernels += best_hyp0 bestKernelNames.append(bestKernelName) if self.verbose: print 'selected kernel:', bestKernelName if self.verbose: print 'selected kernels:', bestKernelNames print return bestKernelNames
def buildKernelAdapt(self, X, C, y, regions, reml=True, maxiter=100): #prepare initial values for sig2e and for fixed effects hyp0_sig2e, hyp0_fixedEffects = self.getInitialHyps(X, C, y) bestKernelNames = [] kernelsListAll = [] hyp_kernels = [] funcToSolve = self.infExact_scipy yVar = y.var() for r_i, r in enumerate(regions): #if (r_i == 0): kernelsToTry = ['lin'] #else: # kernelsToTry = ['lin', 'poly2_lin', 'rbf_lin', 'nn_lin'] kernelsToTry = ['lin', 'poly2_lin', 'rbf_lin', 'nn_lin'] if self.verbose: print print 'selecting a kernel for region', r_i, 'with', r.sum( ), 'SNPs' #add linear kernel X_lastRegion = X[:, r] linKernel = kernels.linearKernel(X_lastRegion) kernelsListAll.append(kernels.ScaledKernel(linKernel)) kernelsListAll.append(None) bestFun = np.inf bestKernelName = None best_hyp0 = None bestKernel = None bestPval = np.inf #iterate over every possible kernel for kernelToTry in kernelsToTry: hyp0 = [0.5 * np.log(0.5 * yVar)] if self.verbose: print 'Testing kernel:', kernelToTry #create the kernel if (kernelToTry == 'lin'): kernel = None df = None elif (kernelToTry == 'rbf_lin'): kernel = kernels.RBFKernel(X_lastRegion) hyp0.append(np.log(1.0)) #ell df = 2 elif (kernelToTry == 'nn_lin'): kernel = kernels.NNKernel(X_lastRegion) hyp0.append(np.log(1.0)) #ell df = 2 elif (kernelToTry == 'poly2_lin'): kernel = kernels.Poly2KernelHomo(linKernel) df = 1 else: raise Exception('unrecognized kernel name') if (kernel is not None): #scale the kernel kernel = kernels.ScaledKernel(kernel) hyp0.append(0.5 * np.log(0.5 * yVar)) #scaling hyp #add the kernel as the final kernel in the kernels list kernelsListAll[-1] = kernel sumKernel = kernels.SumKernel(kernelsListAll) else: sumKernel = kernels.SumKernel(kernelsListAll[:-1]) #test log likelihood obtained with this kernel for this region args = (sumKernel, C, y, reml) self.optimization_counter = 0 hyp0_all = np.concatenate( (hyp0_sig2e, hyp0_fixedEffects, hyp_kernels + hyp0)) optObj = gpUtils.minimize(hyp0_all, funcToSolve, -maxiter, *args) if (not optObj.success): print 'Optimization status:', optObj.status print 'optimization message:', optObj.message raise Exception('optimization failed') print 'final LL: %0.5e' % (-optObj.fun) if (kernelToTry == 'lin'): linLL = -optObj.fun pVal = 1.0 else: llDiff = -optObj.fun - linLL if (llDiff < 0): pVal = 1.0 else: pVal = 0.5 * stats.chi2(df).sf(llDiff) print 'llDiff: %0.5e' % llDiff, 'pVal:%0.5e' % pVal if (kernelToTry == 'lin' or (pVal < bestPval and (len(kernelsToTry) == 1 or pVal < 0.05 / (len(kernelsToTry) - 1)))): bestOptObj = optObj bestPval = pVal bestKernelName = kernelToTry best_hyp0 = hyp0 best_sumKernel = sumKernel bestKernel = kernel if (bestKernel is not None): kernelsListAll[-1] = bestKernel else: kernelsListAll = kernelsListAll[:-1] hyp_kernels += best_hyp0 bestKernelNames.append(bestKernelName) if self.verbose: print 'selected kernel:', bestKernelName if self.verbose: print 'selected kernels:', bestKernelNames print return bestKernelNames