コード例 #1
0
    def crossvalidate(self, y, alphas, n_splits=10):
        """
		lmmlasso cross-validation to get optimal alpha
		alphas = list of alphas to perform cross-validation over
		y = phenotype
		"""
        lasso = lmmlasso.LmmLasso(warm_start=True,
                                  fit_intercept=False,
                                  tol=0.5)
        X = self.E
        K = self.K

        assert K is not None, 'no kinship matrix defined'
        MSE_train, MSE_test, W_nonzero, rsquared = lmmlasso.runCrossValidation(
            lasso, self.E, y, alphas, n_splits=n_splits, K=K, verbose=True)
        train_inter = sp.interpolate.UnivariateSpline(
            x=alphas, y=(MSE_train.mean(axis=0))).derivative(
                n=2)  #Interpolating the values for alphas within the range
        test_inter = sp.interpolate.UnivariateSpline(
            x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2)
        alphas_inter = (sp.linspace(min(alphas), max(alphas), 100))
        idx_train = sp.argmin(train_inter(alphas_inter))  # :/
        idx_test = sp.argmin(test_inter(alphas_inter))  # :/
        alpha_cv = (float(alphas_inter[idx_train]) +
                    float(alphas_inter[idx_test])) / 2
        self.alpha = alpha_cv
        return self.alpha
コード例 #2
0
	n_splits=10
	N = X.shape[0]
	kf = KFold(n_splits,shuffle=True,random_state=None)
	n_alphas = len(alphas)
	MSE_train = sp.zeros((n_splits,n_alphas))
	MSE_test  = sp.zeros((n_splits,n_alphas))
	W_nonzero = sp.zeros((n_splits,n_alphas))
	kf.get_n_splits(X)

	os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2")

	import lmmlasso

	lasso = lmmlasso.LmmLasso(warm_start=True,fit_intercept=False,tol=0.5) #note the tolerance value

	MSE_train,MSE_test,W_nonzero = lmmlasso.runCrossValidation(lasso,X,y,alphas,n_splits=10,K=K,verbose=True)
	MSE_train_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_train.mean(axis=0))).derivative(n=2) #something about the rotation here is different from the original script
	MSE_test_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2)

	alphas_inter = 2.**(sp.linspace(2,12,100))
	idx_train = sp.argmin(MSE_train_inter(alphas_inter)) 
	idx_test = sp.argmin(MSE_test_inter(alphas_inter))
	alpha_cv = (float(alphas_inter[idx_train])+float(alphas_inter[idx_test]))/2

	import pylab as pl
	pl.figure(figsize=[20,4])
	pls = pl.subplot(1,3,1)
	pls.plot(sp.log2(alphas),MSE_train.mean(axis=0),linewidth=2)
	pl.axvline(sp.log2(alpha_cv),color='r')
	pl.xlabel('log alpha')
	pl.ylabel('training error')
コード例 #3
0
n_splits=10
N = X.shape[0]
kf = KFold(n_splits,shuffle=True,random_state=None)
n_alphas = len(alphas)
MSE_train = sp.zeros((n_splits,n_alphas))
MSE_test  = sp.zeros((n_splits,n_alphas))
W_nonzero = sp.zeros((n_splits,n_alphas))
rsquared  = sp.zeros((n_splits,n_alphas))
os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2/Temp_Files")
import lmmlasso


'''Pth = 0'''

lasso = lmmlasso.LmmLasso(warm_start=True,fit_intercept=False,tol=0.5) #note the tolerance value
MSE_train,MSE_test,W_nonzero, rsquared = lmmlasso.runCrossValidation(lasso,X0,y,alphas,n_splits=10,K=K,verbose=True)
MSE_train_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_train.mean(axis=0))).derivative(n=2)#Interpolating the values for alphas within -2 to 10...
MSE_test_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2)
alphas_inter = 2.**(sp.linspace(2,10,100))
idx_train = sp.argmin(MSE_train_inter(alphas_inter)) 
idx_test = sp.argmin(MSE_test_inter(alphas_inter))
alpha_cv = (float(alphas_inter[idx_train])+float(alphas_inter[idx_test]))/2
os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2")
kf12 = KFold(n_splits,shuffle=True,random_state=12) #12 because the random state is 12
#Model fitting step
N = X.shape[0]
#kf = KFold(n_splits,shuffle=True,random_state=12)
MSE_train_final = sp.zeros((n_splits,))
MSE_test_final  = sp.zeros((n_splits,))
W_nonzero_final = sp.zeros((n_splits,))
rsquared_final  = sp.zeros((n_splits,)) 
コード例 #4
0
plt.xlabel('SNPs')
plt.title('GWAS using LMM; F-test; Unique SNPs only')
plt.show()

#Running LMM lasso (100k SNPs + covariance matrix)
import lmmlasso
#Cross validation to get the optimal parameters
alphas = 2.**(sp.linspace(2, 12, 10))
alphas = alphas[::-1]
from lmmlasso import runCrossValidation
lasso = lmmlasso.LmmLasso(
)  #No need to set parameters because these will be decided through cross-validation [may need to set tolerance higher, use tol=0.05 as a baseline]
MSE_train, MSE_test, W_nonzero = lmmlasso.runCrossValidation(lasso,
                                                             SNP_data,
                                                             Pheno_data,
                                                             alphas,
                                                             n_splits=10,
                                                             K=K_data,
                                                             verbose=True)

#Then from Alex's code..
import pylab as pl

MSE_train_inter = sp.interpolate.UnivariateSpline(x=np.flip(alphas, axis=0),
                                                  y=np.flip(
                                                      MSE_train.mean(axis=0),
                                                      axis=0)).derivative(n=2)
MSE_test_inter = sp.interpolate.UnivariateSpline(x=np.flip(alphas, axis=0),
                                                 y=np.flip(
                                                     MSE_test.mean(axis=0),
                                                     axis=0)).derivative(n=2)
コード例 #5
0
ファイル: Oulu_or_noOulu.py プロジェクト: andhikap2/lmmlasso
N = X.shape[0]
kf = KFold(n_splits,shuffle=True,random_state=None)
n_alphas = len(alphas)
MSE_train = sp.zeros((n_splits,n_alphas))
MSE_test  = sp.zeros((n_splits,n_alphas))
W_nonzero = sp.zeros((n_splits,n_alphas))
rsquared  = sp.zeros((n_splits,n_alphas))
kf.get_n_splits(X)

os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2")

import lmmlasso

lasso = lmmlasso.LmmLasso(warm_start=True,fit_intercept=False,tol=0.5) #note the tolerance value

MSE_train,MSE_test,W_nonzero, rsquared = lmmlasso.runCrossValidation(lasso,X,y,alphas,n_splits=10,K=K,verbose=True)
MSE_train_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_train.mean(axis=0))).derivative(n=2)#Interpolating the values for alphas within -2 to 10...
MSE_test_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2)

alphas_inter = 2.**(sp.linspace(2,10,100))
idx_train = sp.argmin(MSE_train_inter(alphas_inter)) 
idx_test = sp.argmin(MSE_test_inter(alphas_inter))

##idx_train=sp.argmin
alpha_cv = (float(alphas_inter[idx_train])+float(alphas_inter[idx_test]))/2

import pylab as pl
pl.figure(figsize=[20,4])
pls = pl.subplot(1,3,1)
pls.plot(sp.log2(alphas),MSE_train.mean(axis=0),linewidth=2)
pl.axvline(sp.log2(alpha_cv),color='r')
コード例 #6
0
lasso = lmmlasso.LmmLasso(warm_start=True,fit_intercept=False,tol=0.5) #note the tolerance value

#Recalculate alpha for each SNP or not?


alphas = 2.**(sp.linspace(-10,10,10)) #list of alphas to test


snp_weights=np.empty((1,snps.shape[1]))
snp_errors=np.empty((1,snps.shape[1]))


for i in range(snps.shape[1]):
	X=snps[:,i]
	X=np.reshape(X,(-1,1))
	MSE_train,MSE_test,W_nonzero, rsquared = lmmlasso.runCrossValidation(lasso,X,y,alphas,n_splits=10,K=K,verbose=True) #X needs to have dimension (5623, 1)
	MSE_train_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_train.mean(axis=0))).derivative(n=2)#Interpolating the values for alphas within -2 to 10...
	MSE_test_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2)
	alphas_inter = 2.**(sp.linspace(-10,10,100))
	idx_train = sp.argmin(MSE_train_inter(alphas_inter)) 
	idx_test = sp.argmin(MSE_test_inter(alphas_inter))
	alpha_cv = (float(alphas_inter[idx_train])+float(alphas_inter[idx_test]))/2
	os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2")
	kf12 = KFold(n_splits,shuffle=True,random_state=12) #12 because the random state is 12
	lasso.set_params(alpha=alpha_cv) 
	lasso = lasso.fit(X,y,K=K)
	weights = lasso.coef_
	snp_weights[1,i]=weights[0]
	predictions=lasso.predict(X,K)
	residuals=y-predictions
	residuals=residuals**2