def crossvalidate(self, y, alphas, n_splits=10): """ lmmlasso cross-validation to get optimal alpha alphas = list of alphas to perform cross-validation over y = phenotype """ lasso = lmmlasso.LmmLasso(warm_start=True, fit_intercept=False, tol=0.5) X = self.E K = self.K assert K is not None, 'no kinship matrix defined' MSE_train, MSE_test, W_nonzero, rsquared = lmmlasso.runCrossValidation( lasso, self.E, y, alphas, n_splits=n_splits, K=K, verbose=True) train_inter = sp.interpolate.UnivariateSpline( x=alphas, y=(MSE_train.mean(axis=0))).derivative( n=2) #Interpolating the values for alphas within the range test_inter = sp.interpolate.UnivariateSpline( x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2) alphas_inter = (sp.linspace(min(alphas), max(alphas), 100)) idx_train = sp.argmin(train_inter(alphas_inter)) # :/ idx_test = sp.argmin(test_inter(alphas_inter)) # :/ alpha_cv = (float(alphas_inter[idx_train]) + float(alphas_inter[idx_test])) / 2 self.alpha = alpha_cv return self.alpha
n_splits=10 N = X.shape[0] kf = KFold(n_splits,shuffle=True,random_state=None) n_alphas = len(alphas) MSE_train = sp.zeros((n_splits,n_alphas)) MSE_test = sp.zeros((n_splits,n_alphas)) W_nonzero = sp.zeros((n_splits,n_alphas)) kf.get_n_splits(X) os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2") import lmmlasso lasso = lmmlasso.LmmLasso(warm_start=True,fit_intercept=False,tol=0.5) #note the tolerance value MSE_train,MSE_test,W_nonzero = lmmlasso.runCrossValidation(lasso,X,y,alphas,n_splits=10,K=K,verbose=True) MSE_train_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_train.mean(axis=0))).derivative(n=2) #something about the rotation here is different from the original script MSE_test_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2) alphas_inter = 2.**(sp.linspace(2,12,100)) idx_train = sp.argmin(MSE_train_inter(alphas_inter)) idx_test = sp.argmin(MSE_test_inter(alphas_inter)) alpha_cv = (float(alphas_inter[idx_train])+float(alphas_inter[idx_test]))/2 import pylab as pl pl.figure(figsize=[20,4]) pls = pl.subplot(1,3,1) pls.plot(sp.log2(alphas),MSE_train.mean(axis=0),linewidth=2) pl.axvline(sp.log2(alpha_cv),color='r') pl.xlabel('log alpha') pl.ylabel('training error')
n_splits=10 N = X.shape[0] kf = KFold(n_splits,shuffle=True,random_state=None) n_alphas = len(alphas) MSE_train = sp.zeros((n_splits,n_alphas)) MSE_test = sp.zeros((n_splits,n_alphas)) W_nonzero = sp.zeros((n_splits,n_alphas)) rsquared = sp.zeros((n_splits,n_alphas)) os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2/Temp_Files") import lmmlasso '''Pth = 0''' lasso = lmmlasso.LmmLasso(warm_start=True,fit_intercept=False,tol=0.5) #note the tolerance value MSE_train,MSE_test,W_nonzero, rsquared = lmmlasso.runCrossValidation(lasso,X0,y,alphas,n_splits=10,K=K,verbose=True) MSE_train_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_train.mean(axis=0))).derivative(n=2)#Interpolating the values for alphas within -2 to 10... MSE_test_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2) alphas_inter = 2.**(sp.linspace(2,10,100)) idx_train = sp.argmin(MSE_train_inter(alphas_inter)) idx_test = sp.argmin(MSE_test_inter(alphas_inter)) alpha_cv = (float(alphas_inter[idx_train])+float(alphas_inter[idx_test]))/2 os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2") kf12 = KFold(n_splits,shuffle=True,random_state=12) #12 because the random state is 12 #Model fitting step N = X.shape[0] #kf = KFold(n_splits,shuffle=True,random_state=12) MSE_train_final = sp.zeros((n_splits,)) MSE_test_final = sp.zeros((n_splits,)) W_nonzero_final = sp.zeros((n_splits,)) rsquared_final = sp.zeros((n_splits,))
plt.xlabel('SNPs') plt.title('GWAS using LMM; F-test; Unique SNPs only') plt.show() #Running LMM lasso (100k SNPs + covariance matrix) import lmmlasso #Cross validation to get the optimal parameters alphas = 2.**(sp.linspace(2, 12, 10)) alphas = alphas[::-1] from lmmlasso import runCrossValidation lasso = lmmlasso.LmmLasso( ) #No need to set parameters because these will be decided through cross-validation [may need to set tolerance higher, use tol=0.05 as a baseline] MSE_train, MSE_test, W_nonzero = lmmlasso.runCrossValidation(lasso, SNP_data, Pheno_data, alphas, n_splits=10, K=K_data, verbose=True) #Then from Alex's code.. import pylab as pl MSE_train_inter = sp.interpolate.UnivariateSpline(x=np.flip(alphas, axis=0), y=np.flip( MSE_train.mean(axis=0), axis=0)).derivative(n=2) MSE_test_inter = sp.interpolate.UnivariateSpline(x=np.flip(alphas, axis=0), y=np.flip( MSE_test.mean(axis=0), axis=0)).derivative(n=2)
N = X.shape[0] kf = KFold(n_splits,shuffle=True,random_state=None) n_alphas = len(alphas) MSE_train = sp.zeros((n_splits,n_alphas)) MSE_test = sp.zeros((n_splits,n_alphas)) W_nonzero = sp.zeros((n_splits,n_alphas)) rsquared = sp.zeros((n_splits,n_alphas)) kf.get_n_splits(X) os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2") import lmmlasso lasso = lmmlasso.LmmLasso(warm_start=True,fit_intercept=False,tol=0.5) #note the tolerance value MSE_train,MSE_test,W_nonzero, rsquared = lmmlasso.runCrossValidation(lasso,X,y,alphas,n_splits=10,K=K,verbose=True) MSE_train_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_train.mean(axis=0))).derivative(n=2)#Interpolating the values for alphas within -2 to 10... MSE_test_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2) alphas_inter = 2.**(sp.linspace(2,10,100)) idx_train = sp.argmin(MSE_train_inter(alphas_inter)) idx_test = sp.argmin(MSE_test_inter(alphas_inter)) ##idx_train=sp.argmin alpha_cv = (float(alphas_inter[idx_train])+float(alphas_inter[idx_test]))/2 import pylab as pl pl.figure(figsize=[20,4]) pls = pl.subplot(1,3,1) pls.plot(sp.log2(alphas),MSE_train.mean(axis=0),linewidth=2) pl.axvline(sp.log2(alpha_cv),color='r')
lasso = lmmlasso.LmmLasso(warm_start=True,fit_intercept=False,tol=0.5) #note the tolerance value #Recalculate alpha for each SNP or not? alphas = 2.**(sp.linspace(-10,10,10)) #list of alphas to test snp_weights=np.empty((1,snps.shape[1])) snp_errors=np.empty((1,snps.shape[1])) for i in range(snps.shape[1]): X=snps[:,i] X=np.reshape(X,(-1,1)) MSE_train,MSE_test,W_nonzero, rsquared = lmmlasso.runCrossValidation(lasso,X,y,alphas,n_splits=10,K=K,verbose=True) #X needs to have dimension (5623, 1) MSE_train_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_train.mean(axis=0))).derivative(n=2)#Interpolating the values for alphas within -2 to 10... MSE_test_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2) alphas_inter = 2.**(sp.linspace(-10,10,100)) idx_train = sp.argmin(MSE_train_inter(alphas_inter)) idx_test = sp.argmin(MSE_test_inter(alphas_inter)) alpha_cv = (float(alphas_inter[idx_train])+float(alphas_inter[idx_test]))/2 os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2") kf12 = KFold(n_splits,shuffle=True,random_state=12) #12 because the random state is 12 lasso.set_params(alpha=alpha_cv) lasso = lasso.fit(X,y,K=K) weights = lasso.coef_ snp_weights[1,i]=weights[0] predictions=lasso.predict(X,K) residuals=y-predictions residuals=residuals**2