Example #1
0
File: qtl.py Project: PMBio/limix
	def __init__(self, snps, pheno, K=None, covs=None, test='lrt', NumIntervalsDelta0=100, NumIntervalsDeltaAlt=100, searchDelta=False, verbose=None):
		"""
		Univariate fixed effects linear mixed model test for all SNPs

		If phenotypes have missing values, then the subset of individuals used for each phenotype column
		will be subsetted

		Args:
			snps:   [N x S] np.array of S SNPs for N individuals
			pheno:  [N x P] np.array of P phenotype sfor N individuals
			K:      [N x N] np.array of LMM-covariance/kinship koefficients (optional)
							If not provided, then linear regression analysis is performed
			covs:   [N x D] np.array of D covariates for N individuals
			test:   'lrt' for likelihood ratio test (default) or 'f' for F-test
			NumIntervalsDelta0:     number of steps for delta optimization on the null model (100)
			NumIntervalsDeltaAlt:   number of steps for delta optimization on the alt. model (100), requires searchDelta=True to have an effect.
			searchDelta:     Carry out delta optimization on the alternative model? if yes We use NumIntervalsDeltaAlt steps
			verbose: print verbose output? (False)
		"""
		#create column of 1 for fixed if nothing provide
		if len(pheno.shape)==1:
			pheno = pheno[:,sp.newaxis]

		self.verbose = dlimix.getVerbose(verbose)
		self.snps = snps
		self.pheno = pheno
		self.K = K
		self.covs = covs
		self.test = test
		self.NumIntervalsDelta0 = NumIntervalsDelta0
		self.NumIntervalsDeltaAlt = NumIntervalsDeltaAlt
		self.searchDelta = searchDelta
		self.verbose = verbose
		self.N       = self.pheno.shape[0]
		self.P       = self.pheno.shape[1]
		self.Iok     = ~(np.isnan(self.pheno).any(axis=1))
		if self.K is None:
			self.searchDelta=False
			self.K = np.eye(self.snps.shape[0])
		if self.covs is None:
			self.covs = np.ones((self.snps.shape[0],1))

		self._lmm = None
		#run
		self.verbose = verbose
		self.process()
Example #2
0
    def _getH2singleTrait(self, K, verbose=None):
        """
        Internal function for parameter initialization
        estimate variance components and fixed effect using a linear mixed model with an intercept and 2 random effects (one is noise)
        Args:
            K:        covariance matrix of the non-noise random effect term
        """
        verbose = dlimix.getVerbose(verbose)
        # Fit single trait model
        varg = sp.zeros(self.P)
        varn = sp.zeros(self.P)
        fixed = sp.zeros((1, self.P))

        for p in range(self.P):
            y = self.Y[:, p:p + 1]
            # check if some sull value
            I = sp.isnan(y[:, 0])
            if I.sum() > 0:
                y = y[~I, :]
                _K = K[~I, :][:, ~I]
            else:
                _K = copy.copy(K)
            lmm = dlimix.CLMM()
            lmm.setK(_K)
            lmm.setSNPs(sp.ones((y.shape[0], 1)))
            lmm.setPheno(y)
            lmm.setCovs(sp.zeros((y.shape[0], 1)))
            lmm.setVarcompApprox0(-20, 20, 1000)
            lmm.process()
            delta = sp.exp(lmm.getLdelta0()[0, 0])
            Vtot = sp.exp(lmm.getLSigma()[0, 0])

            varg[p] = Vtot
            varn[p] = delta * Vtot
            fixed[:, p] = lmm.getBetaSNP()

            if verbose: print p

        sth = {}
        sth['varg'] = varg
        sth['varn'] = varn
        sth['fixed'] = fixed

        return sth
Example #3
0
    def _getH2singleTrait(self, K, verbose=None):
        """
        Internal function for parameter initialization
        estimate variance components and fixed effect using a linear mixed model with an intercept and 2 random effects (one is noise)
        Args:
            K:        covariance matrix of the non-noise random effect term
        """
        verbose = dlimix.getVerbose(verbose)
        # Fit single trait model
        varg  = sp.zeros(self.P)
        varn  = sp.zeros(self.P)
        fixed = sp.zeros((1,self.P))

        for p in range(self.P):
            y = self.Y[:,p:p+1]
            # check if some sull value
            I  = sp.isnan(y[:,0])
            if I.sum()>0:
                y  = y[~I,:]
                _K = K[~I,:][:,~I]
            else:
                _K  = copy.copy(K)
            lmm = dlimix.CLMM()
            lmm.setK(_K)
            lmm.setSNPs(sp.ones((y.shape[0],1)))
            lmm.setPheno(y)
            lmm.setCovs(sp.zeros((y.shape[0],1)))
            lmm.setVarcompApprox0(-20, 20, 1000)
            lmm.process()
            delta = sp.exp(lmm.getLdelta0()[0,0])
            Vtot  = sp.exp(lmm.getLSigma()[0,0])

            varg[p] = Vtot
            varn[p] = delta*Vtot
            fixed[:,p] = lmm.getBetaSNP()

            if verbose: print(p)

        sth = {}
        sth['varg']  = varg
        sth['varn']  = varn
        sth['fixed'] = fixed

        return sth
Example #4
0
File: qtl.py Project: PMBio/limix
def test_interaction_GxE_1dof(snps,pheno,env,K=None,covs=None, test='lrt',verbose=None):
    """
    Univariate GxE fixed effects interaction linear mixed model test for all
    pairs of SNPs and environmental variables.

    Args:
        snps:   [N x S] np.array of S SNPs for N individuals
        pheno:  [N x 1] np.array of 1 phenotype for N individuals
        env:    [N x E] np.array of E environmental variables for N individuals
        K:      [N x N] np.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] np.array of D covariates for N individuals
        test:    'lrt' for likelihood ratio test (default) or 'f' for F-test
        verbose: print verbose output? (False)

    Returns:
        pv:     [E x S] np.array of P values for interaction tests between all
                E environmental variables and all S SNPs
    """
    verbose = dlimix.getVerbose(verbose)
    N=snps.shape[0]
    if K is None:
        K=np.eye(N)
    if covs is None:
        covs = np.ones((N,1))
    assert (env.shape[0]==N and pheno.shape[0]==N and K.shape[0]==N and K.shape[1]==N and covs.shape[0]==N), "shapes missmatch"
    Inter0 = np.ones((N,1))
    pv = np.zeros((env.shape[1],snps.shape[1]))
    if verbose:
        print(("starting %i interaction scans for %i SNPs each." % (env.shape[1], snps.shape[1])))
    t0=time.time()
    for i in range(env.shape[1]):
        t0_i = time.time()
        cov_i = np.concatenate((covs,env[:,i:(i+1)]),1)
        lm_i = test_interaction_lmm(snps=snps,pheno=pheno,covs=cov_i,Inter=env[:,i:(i+1)],Inter0=Inter0,test=test)
        pv[i,:]=lm_i.getPv()[0,:]
        t1_i = time.time()
        if verbose:
            print(("Finished %i out of %i interaction scans in %.2f seconds."%((i+1),env.shape[1],(t1_i-t0_i))))
    t1 = time.time()
    print(("-----------------------------------------------------------\nFinished all %i interaction scans in %.2f seconds."%(env.shape[1],(t1-t0))))
    return pv
Example #5
0
File: qtl.py Project: PMBio/limix
def forward_lmm_kronecker(snps,phenos,Asnps=None,Acond=None,K1r=None,K1c=None,K2r=None,K2c=None,covs=None,Acovs=None,threshold=5e-8,maxiter=2,qvalues=False, update_covariances = False,verbose=None,**kw_args):
    """
    Kronecker fixed effects test with forward selection

    Args:
        snps:   [N x S] np.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x P] np.array of 1 phenotype for N individuals
        K:      [N x N] np.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] np.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        qvalues:        Use q-value threshold and return q-values in addition (default False)
        update_covar:   Boolean indicator if covariances should be re-estimated after each forward step (default False)

    Returns:
        lm:             lmix LMMi object
        resultStruct with elements:
            iadded:         array of indices of SNPs included in order of inclusion
            pvadded:        array of Pvalues obtained by the included SNPs in iteration
                            before inclusion
            pvall:     [Nadded x S] np.array of Pvalues for all iterations.
        Optional:      corresponding q-values
            qvadded
            qvall
    """
    verbose = dlimix.getVerbose(verbose)
    #0. checks
    N  = phenos.shape[0]
    P  = phenos.shape[1]

    if K1r==None:
        K1r = np.dot(snps,snps.T)
    else:
        assert K1r.shape[0]==N, 'K1r: dimensions dismatch'
        assert K1r.shape[1]==N, 'K1r: dimensions dismatch'

    if K2r==None:
        K2r = np.eye(N)
    else:
        assert K2r.shape[0]==N, 'K2r: dimensions dismatch'
        assert K2r.shape[1]==N, 'K2r: dimensions dismatch'

    covs,Acovs = _updateKronCovs(covs,Acovs,N,P)

    if Asnps is None:
        Asnps = [np.ones([1,P])]
    if (type(Asnps)!=list):
        Asnps = [Asnps]
    assert len(Asnps)>0, "need at least one Snp design matrix"

    if Acond is None:
        Acond = Asnps
    if (type(Acond)!=list):
        Acond = [Acond]
    assert len(Acond)>0, "need at least one Snp design matrix"

    #1. run GP model to infer suitable covariance structure
    if K1c==None or K2c==None:
        vc = _estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, **kw_args)
        K1c = vc.getTraitCovar(0)
        K2c = vc.getTraitCovar(1)
    else:
        vc = None
        assert K1c.shape[0]==P, 'K1c: dimensions dismatch'
        assert K1c.shape[1]==P, 'K1c: dimensions dismatch'
        assert K2c.shape[0]==P, 'K2c: dimensions dismatch'
        assert K2c.shape[1]==P, 'K2c: dimensions dismatch'
    t0 = time.time()
    lm,pv = test_lmm_kronecker(snps=snps,phenos=phenos,Asnps=Asnps,K1r=K1r,K2r=K2r,K1c=K1c,K2c=K2c,covs=covs,Acovs=Acovs)

    #get pv
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    time_el = []
    pvall = []
    qvall = None
    t1=time.time()
    if verbose:
        print(("finished GWAS testing in %.2f seconds" %(t1-t0)))
    time_el.append(t1-t0)
    pvall.append(pv)
    imin= np.unravel_index(pv.argmin(),pv.shape)
    score=pv[imin].min()
    niter = 1
    if qvalues:
        assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = []
        qv  = FDR.qvalues(pv)
        qvall.append(qv)
        score=qv[imin]
    #loop:
    while (score<threshold) and niter<maxiter:
        t0=time.time()
        pvadded.append(pv[imin])
        iadded.append(imin)
        if qvalues:
            qvadded.append(qv[imin])
        if update_covariances and vc is not None:
            vc.addFixedTerm(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]])
            vc.setScales()#CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None
            vc.findLocalOptima(fast=True)
            K1c = vc.getTraitCovar(0)
            K2c = vc.getTraitCovar(1)
            lm.setK1c(K1c)
            lm.setK2c(K2c)
        lm.addCovariates(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]])
        for i in range(len(Asnps)):
            #add SNP design
            lm.setSNPcoldesign(Asnps[i])
            lm.process()
            pv[i,:] = lm.getPv()[0]
        pvall.append(pv.ravel())
        imin= np.unravel_index(pv.argmin(),pv.shape)
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter+1,:] = qv
            score = qv[imin].min()
        else:
            score = pv[imin].min()
        t1=time.time()
        if verbose:
            print(("finished GWAS testing in %.2f seconds" %(t1-t0)))
        time_el.append(t1-t0)
        niter=niter+1
    RV = {}
    RV['iadded']  = iadded
    RV['pvadded'] = pvadded
    RV['pvall']   = np.array(pvall)
    RV['time_el'] = time_el
    if qvalues:
        RV['qvall'] = qvall
        RV['qvadded'] = qvadded
    return lm,RV
Example #6
0
File: qtl.py Project: PMBio/limix
def forward_lmm(snps,pheno,K=None,covs=None,qvalues=False,threshold=5e-8,maxiter=2,test='lrt',verbose=None,**kw_args):
    """
    univariate fixed effects test with forward selection

    Args:
        snps:   [N x S] np.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x 1] np.array of 1 phenotype for N individuals
        K:      [N x N] np.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] np.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        test:           'lrt' for likelihood ratio test (default) or 'f' for F-test
        verbose: print verbose output? (False)

    Returns:
        lm:     limix LMM object
        RV:     dictionary
                RV['iadded']:   array of indices of SNPs included in order of inclusion
                RV['pvadded']:  array of Pvalues obtained by the included SNPs in iteration
                                before inclusion
                RV['pvall']:    [Nadded x S] np.array of Pvalues for all iterations
    """
    verbose = dlimix.getVerbose(verbose)

    if K is None:
        K=np.eye(snps.shape[0])
    if covs is None:
        covs = np.ones((snps.shape[0],1))
    #assert single trait
    assert pheno.shape[1]==1, 'forward_lmm only supports single phenotypes'

    lm = test_lmm(snps,pheno,K=K,covs=covs,test=test,**kw_args)
    pvall = []
    pv = lm.getPv().ravel()
    #hack to avoid issues with degenerate pv
    pv[sp.isnan(pv)] = 1
    pvall.append(pv)
    imin= pv.argmin()
    niter = 1
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    if qvalues:
        assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = []
        qv  = FDR.qvalues(pv)
        qvall.append(qv)
        score=qv.min()
    else:
        score=pv.min()
    while (score<threshold) and niter<maxiter:
        t0=time.time()
        iadded.append(imin)
        pvadded.append(pv[imin])
        if qvalues:
            qvadded.append(qv[0,imin])
        covs=np.concatenate((covs,snps[:,imin:(imin+1)]),1)
        lm.setCovs(covs)
        lm.process()
        pv = lm.getPv().ravel()
        pv[sp.isnan(pv)] = 1
        pvall.append(pv)
        imin= pv.argmin()
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter+1,:] = qv
            score = qv.min()
        else:
            score = pv.min()
        t1=time.time()
        if verbose:
            print(("finished GWAS testing in %.2f seconds" %(t1-t0)))
        niter=niter+1
    RV = {}
    RV['iadded']  = iadded
    RV['pvadded'] = pvadded
    RV['pvall']   = np.array(pvall)
    if qvalues:
        RV['qvall'] = np.array(qvall)
        RV['qvadded'] = qvadded
    return lm,RV
Example #7
0
    def crossValidation(self,
                        seed=0,
                        n_folds=10,
                        fullVector=True,
                        verbose=None,
                        D=None,
                        **keywords):
        """
        Split the dataset in n folds, predict each fold after training the model on all the others

        Args:
            seed:        seed
            n_folds:     number of folds to train the model on
            fullVector:  Bolean indicator, if true it stops if no convergence is observed for one of the folds, otherwise goes through and returns a pheno matrix with missing values
            verbose:     if true, prints the fold that is being used for predicitons
            **keywords:  params to pass to the function optimize
        Returns:
            Matrix of phenotype predictions [N,P]
        """
        verbose = dlimix.getVerbose(verbose)

        # split samples into training and test
        sp.random.seed(seed)
        r = sp.random.permutation(self.Y.shape[0])
        nfolds = 10
        Icv = sp.floor(((sp.ones(
            (self.Y.shape[0])) * nfolds) * r) / self.Y.shape[0])

        RV = {}
        if self.P == 1: RV['var'] = sp.zeros((nfolds, self.n_randEffs))
        else: RV['var'] = sp.zeros((nfolds, self.P, self.n_randEffs))

        Ystar = sp.zeros_like(self.Y)

        for fold_j in range(n_folds):

            if verbose:
                print ".. predict fold %d" % fold_j

            Itrain = Icv != fold_j
            Itest = Icv == fold_j
            Ytrain = self.Y[Itrain, :]
            Ytest = self.Y[Itest, :]
            vc = VarianceDecomposition(Ytrain)
            vc.setTestSampleSize(Itest.sum())
            for term_i in range(self.n_fixedEffs):
                F = self.vd.getFixed(term_i)
                Ftest = F[Itest, :]
                Ftrain = F[Itrain, :]
                if self.P > 1: A = self.vd.getDesign(term_i)
                else: A = None
                vc.addFixedEffect(F=Ftrain, Ftest=Ftest, A=A)
            for term_i in range(self.n_randEffs):
                if self.P > 1:
                    tct = self.trait_covar_type[term_i]
                    rank = self.rank[term_i]
                    ftc = self.fixed_tc[term_i]
                    jitt = self.jitter[term_i]
                    if tct == 'lowrank_diag1' or tct == 'freeform1':
                        d = D[fold_j, :, term_i]
                    else:
                        d = None
                else:
                    tct = None
                    rank = None
                    ftc = None
                    jitt = None
                    d = None
                if term_i == self.noisPos:
                    vc.addRandomEffect(is_noise=True,
                                       trait_covar_type=tct,
                                       rank=rank,
                                       jitter=jitt,
                                       fixed_trait_covar=ftc,
                                       d=d)
                else:
                    R = self.vd.getTerm(term_i).getK()
                    Rtrain = R[Itrain, :][:, Itrain]
                    Rcross = R[Itrain, :][:, Itest]
                    vc.addRandomEffect(K=Rtrain,
                                       Kcross=Rcross,
                                       trait_covar_type=tct,
                                       rank=rank,
                                       jitter=jitt,
                                       fixed_trait_covar=ftc,
                                       d=d)
            conv = vc.optimize(verbose=False, **keywords)
            if self.P == 1:
                RV['var'][fold_j, :] = vc.getVarianceComps()[0, :]
            else:
                RV['var'][fold_j, :, :] = vc.getVarianceComps()

            if fullVector:
                assert conv, 'VarianceDecompositon:: not converged for fold %d. Stopped here' % fold_j
            if conv:
                Ystar[Itest, :] = vc.predictPhenos()
            else:
                warnings.warn('not converged for fold %d' % fold_j)
                Ystar[Itest, :] = sp.nan

        return Ystar, RV
Example #8
0
    def optimize_with_repeates(self,
                               fast=None,
                               verbose=None,
                               n_times=10,
                               lambd=None,
                               lambd_g=None,
                               lambd_n=None):
        """
        Train the model repeadly up to a number specified by the users with random restarts and
        return a list of all relative minima that have been found. This list is sorted according to
        least likelihood. Each list term is a dictionary with keys "counter", "LML", and "scales".

        After running this function, the vc object will be set at the last iteration. Thus, if you
        wish to get the vc object of one of the repeats, then set the scales. For example:

        vc.setScales(scales=optimize_with_repeates_output[0]["scales"])

        Args:
            fast:       Boolean. if set to True initalize kronSumGP
            verbose:    Boolean. If set to True, verbose output is produced. (default True)
            n_times:    number of re-starts of the optimization. (default 10)
        """
        verbose = dlimix.getVerbose(verbose)

        if not self.init: self._initGP(fast)

        opt_list = []

        fixed0 = sp.zeros_like(self.gp.getParams()['dataTerm'])

        # minimize n_times
        for i in range(n_times):

            scales1 = self._getScalesRand()
            fixed1 = 1e-1 * sp.randn(fixed0.shape[0], fixed0.shape[1])
            conv = self.trainGP(fast=fast,
                                scales0=scales1,
                                fixed0=fixed1,
                                lambd=lambd,
                                lambd_g=lambd_g,
                                lambd_n=lambd_n)

            if conv:
                # compare with previous minima
                temp = 1
                for j in range(len(opt_list)):
                    if sp.allclose(abs(self.getScales()),
                                   abs(opt_list[j]['scales'])):
                        temp = 0
                        opt_list[j]['counter'] += 1
                        break
                if temp == 1:
                    opt = {}
                    opt['counter'] = 1
                    opt['LML'] = self.getLML()
                    opt['scales'] = self.getScales()
                    opt_list.append(opt)

        # sort by LML
        LML = sp.array([opt_list[i]['LML'] for i in range(len(opt_list))])
        index = LML.argsort()[::-1]
        out = []
        if verbose:
            print "\nLocal mimima\n"
            print "n_times\t\tLML"
            print "------------------------------------"

        for i in range(len(opt_list)):
            out.append(opt_list[index[i]])
            if verbose:
                print "%d\t\t%f" % (opt_list[index[i]]['counter'],
                                    opt_list[index[i]]['LML'])
                print ""

        return out
Example #9
0
    def optimize(self,
                 fast=None,
                 scales0=None,
                 fixed0=None,
                 init_method=None,
                 termx=0,
                 n_times=10,
                 perturb=True,
                 pertSize=1e-3,
                 verbose=None,
                 lambd=None,
                 lambd_g=None,
                 lambd_n=None):
        """
        Train the model using the specified initialization strategy

        Args:
            fast:            if true, fast gp is considered; if None (default), fast inference is considered if possible
            scales0:        if not None init_method is set to manual
            fixed0:         initial weights for fixed effects
            init_method:    initialization strategy:
                                'random': variance component parameters (scales) are sampled from a normal distribution with mean 0 and std 1,
                                'diagonal': uses the a two-random-effect single trait model to initialize the parameters,
                                'manual': the starting point is set manually,
            termx:            term used for initialization in the diagonal strategy
            n_times:        number of restarts to converge
            perturb:        if true, the initial point (set manually opr through the single-trait model) is perturbed with gaussian noise
            perturbSize:    std of the gassian noise used to perturb the initial point
            verbose:        print if convergence is achieved and how many restarted were needed
        """
        verbose = dlimix.getVerbose(verbose)

        if init_method is None:
            if self.P == 1: init_method = 'random'
            else: init_method = 'diagonal'

        if not self.init: self._initGP(fast=fast)

        if scales0 is not None and ~perturb: init_method = 'manual'

        if init_method == 'diagonal':
            scales0 = self._getScalesDiag(termx=termx)

        if init_method == 'pairwise':
            assert self.n_randEffs == 2, 'VarianceDecomposition:: pairwise initialization possible only with 2 terms'
            assert self.P > 1, 'VarianceDecomposition:: pairwise initialization possible only with P>1'
            i = (self.trait_covar_type[0]
                 == 'freeform') * (self.trait_covar_type[0] == 'freeform')
            assert i, 'VarianceDecomposition:: pairwise initialization possible only with freeform matrices'
            scales0 = self._getScalesPairwise(verbose=verbose)

        if init_method in ['diagonal', 'manual', 'pairwise']:
            if not perturb: n_times = 1

        if fixed0 is None:
            fixed0 = sp.zeros_like(self.gp.getParams()['dataTerm'])

        for i in range(n_times):
            if init_method == 'random':
                scales1 = self._getScalesRand()
                fixed1 = pertSize * sp.randn(fixed0.shape[0], fixed0.shape[1])
            elif perturb:
                scales1 = scales0 + pertSize * self._perturbation()
                fixed1 = fixed0 + pertSize * sp.randn(fixed0.shape[0],
                                                      fixed0.shape[1])
            else:
                scales1 = scales0
                fixed1 = fixed0

            conv = self.trainGP(scales0=scales1,
                                fixed0=fixed1,
                                lambd=lambd,
                                lambd_g=lambd_g,
                                lambd_n=lambd_n)
            if conv: break

        if verbose:
            if conv == False:
                print 'No local minimum found for the tested initialization points'
            else:
                print 'Local minimum found at iteration %d' % i

        return conv
Example #10
0
    def crossValidation(self,seed=0,n_folds=10,fullVector=True,verbose=None,D=None,**keywords):
        """
        Split the dataset in n folds, predict each fold after training the model on all the others

        Args:
            seed:        seed
            n_folds:     number of folds to train the model on
            fullVector:  Bolean indicator, if true it stops if no convergence is observed for one of the folds, otherwise goes through and returns a pheno matrix with missing values
            verbose:     if true, prints the fold that is being used for predicitons
            **keywords:  params to pass to the function optimize
        Returns:
            Matrix of phenotype predictions [N,P]
        """
        verbose = dlimix.getVerbose(verbose)

        # split samples into training and test
        sp.random.seed(seed)
        r = sp.random.permutation(self.Y.shape[0])
        nfolds = 10
        Icv = sp.floor(((sp.ones((self.Y.shape[0]))*nfolds)*r)/self.Y.shape[0])

        RV = {}
        if self.P==1:  RV['var'] = sp.zeros((nfolds,self.n_randEffs))
        else:          RV['var'] = sp.zeros((nfolds,self.P,self.n_randEffs))

        Ystar = sp.zeros_like(self.Y)

        for fold_j in range(n_folds):

            if verbose:
                print((".. predict fold %d"%fold_j))

            Itrain  = Icv!=fold_j
            Itest   = Icv==fold_j
            Ytrain  = self.Y[Itrain,:]
            Ytest   = self.Y[Itest,:]
            vc = VarianceDecomposition(Ytrain)
            vc.setTestSampleSize(Itest.sum())
            for term_i in range(self.n_fixedEffs):
                F      = self.vd.getFixed(term_i)
                Ftest  = F[Itest,:]
                Ftrain = F[Itrain,:]
                if self.P>1:    A = self.vd.getDesign(term_i)
                else:           A = None
                vc.addFixedEffect(F=Ftrain,Ftest=Ftest,A=A)
            for term_i in range(self.n_randEffs):
                if self.P>1:
                    tct  = self.trait_covar_type[term_i]
                    rank = self.rank[term_i]
                    ftc  = self.fixed_tc[term_i]
                    jitt = self.jitter[term_i]
                    if tct=='lowrank_diag1' or tct=='freeform1':
                        d = D[fold_j,:,term_i]
                    else:
                        d = None
                else:
                    tct  = None
                    rank = None
                    ftc  = None
                    jitt = None
                    d    = None
                if term_i==self.noisPos:
                    vc.addRandomEffect(is_noise=True,trait_covar_type=tct,rank=rank,jitter=jitt,fixed_trait_covar=ftc,d=d)
                else:
                    R = self.vd.getTerm(term_i).getK()
                    Rtrain = R[Itrain,:][:,Itrain]
                    Rcross = R[Itrain,:][:,Itest]
                    vc.addRandomEffect(K=Rtrain,Kcross=Rcross,trait_covar_type=tct,rank=rank,jitter=jitt,fixed_trait_covar=ftc,d=d)
            conv = vc.optimize(verbose=False,**keywords)
            if self.P==1:
                RV['var'][fold_j,:] = vc.getVarianceComps()[0,:]
            else:
                RV['var'][fold_j,:,:] = vc.getVarianceComps()

            if fullVector:
                assert conv, 'VarianceDecompositon:: not converged for fold %d. Stopped here' % fold_j
            if conv:
                Ystar[Itest,:] = vc.predictPhenos()
            else:
                warnings.warn('not converged for fold %d' % fold_j)
                Ystar[Itest,:] = sp.nan

        return Ystar,RV
Example #11
0
    def optimize_with_repeates(self,fast=None,verbose=None,n_times=10,lambd=None,lambd_g=None,lambd_n=None):
        """
        Train the model repeadly up to a number specified by the users with random restarts and
        return a list of all relative minima that have been found. This list is sorted according to
        least likelihood. Each list term is a dictionary with keys "counter", "LML", and "scales".

        After running this function, the vc object will be set at the last iteration. Thus, if you
        wish to get the vc object of one of the repeats, then set the scales. For example:

        vc.setScales(scales=optimize_with_repeates_output[0]["scales"])

        Args:
            fast:       Boolean. if set to True initalize kronSumGP
            verbose:    Boolean. If set to True, verbose output is produced. (default True)
            n_times:    number of re-starts of the optimization. (default 10)
        """
        verbose = dlimix.getVerbose(verbose)

        if not self.init:       self._initGP(fast)

        opt_list = []

        fixed0 = sp.zeros_like(self.gp.getParams()['dataTerm'])

        # minimize n_times
        for i in range(n_times):

            scales1 = self._getScalesRand()
            fixed1  = 1e-1*sp.randn(fixed0.shape[0],fixed0.shape[1])
            conv = self.trainGP(fast=fast,scales0=scales1,fixed0=fixed1,lambd=lambd,lambd_g=lambd_g,lambd_n=lambd_n)

            if conv:
                # compare with previous minima
                temp=1
                for j in range(len(opt_list)):
                    if sp.allclose(abs(self.getScales()),abs(opt_list[j]['scales'])):
                        temp=0
                        opt_list[j]['counter']+=1
                        break
                if temp==1:
                    opt = {}
                    opt['counter'] = 1
                    opt['LML'] = self.getLML()
                    opt['scales'] = self.getScales()
                    opt_list.append(opt)


        # sort by LML
        LML = sp.array([opt_list[i]['LML'] for i in range(len(opt_list))])
        index   = LML.argsort()[::-1]
        out = []
        if verbose:
            print("\nLocal mimima\n")
            print("n_times\t\tLML")
            print("------------------------------------")

        for i in range(len(opt_list)):
            out.append(opt_list[index[i]])
            if verbose:
                print(("%d\t\t%f" % (opt_list[index[i]]['counter'], opt_list[index[i]]['LML'])))
                print("")

        return out
Example #12
0
    def optimize(self,fast=None,scales0=None,fixed0=None,init_method=None,termx=0,n_times=10,perturb=True,pertSize=1e-3,verbose=None,lambd=None,lambd_g=None,lambd_n=None):
        """
        Train the model using the specified initialization strategy

        Args:
            fast:            if true, fast gp is considered; if None (default), fast inference is considered if possible
            scales0:        if not None init_method is set to manual
            fixed0:         initial weights for fixed effects
            init_method:    initialization strategy:
                                'random': variance component parameters (scales) are sampled from a normal distribution with mean 0 and std 1,
                                'diagonal': uses the a two-random-effect single trait model to initialize the parameters,
                                'manual': the starting point is set manually,
            termx:            term used for initialization in the diagonal strategy
            n_times:        number of restarts to converge
            perturb:        if true, the initial point (set manually opr through the single-trait model) is perturbed with gaussian noise
            perturbSize:    std of the gassian noise used to perturb the initial point
            verbose:        print if convergence is achieved and how many restarted were needed
        """
        verbose = dlimix.getVerbose(verbose)


        if init_method is None:
            if self.P==1:    init_method = 'random'
            else:           init_method = 'diagonal'

        if not self.init:        self._initGP(fast=fast)

        if scales0 is not None and ~perturb:     init_method = 'manual'

        if init_method=='diagonal':
            scales0 = self._getScalesDiag(termx=termx)

        if init_method=='pairwise':
            assert self.n_randEffs==2, 'VarianceDecomposition:: pairwise initialization possible only with 2 terms'
            assert self.P>1, 'VarianceDecomposition:: pairwise initialization possible only with P>1'
            i = (self.trait_covar_type[0]=='freeform')*(self.trait_covar_type[0]=='freeform')
            assert i, 'VarianceDecomposition:: pairwise initialization possible only with freeform matrices'
            scales0 = self._getScalesPairwise(verbose=verbose)


        if init_method in ['diagonal','manual','pairwise']:
            if not perturb:        n_times = 1

        if fixed0 is None:
            fixed0 = sp.zeros_like(self.gp.getParams()['dataTerm'])

        for i in range(n_times):
            if init_method=='random':
                scales1 = self._getScalesRand()
                fixed1  = pertSize*sp.randn(fixed0.shape[0],fixed0.shape[1])
            elif perturb:
                scales1 = scales0+pertSize*self._perturbation()
                fixed1  = fixed0+pertSize*sp.randn(fixed0.shape[0],fixed0.shape[1])
            else:
                scales1 = scales0
                fixed1  = fixed0

            conv = self.trainGP(scales0=scales1,fixed0=fixed1,lambd=lambd,lambd_g=lambd_g,lambd_n=lambd_n)
            if conv:    break

        if verbose:
            if conv==False:
                print('No local minimum found for the tested initialization points')
            else:
                print(('Local minimum found at iteration %d' % i))

        return conv