예제 #1
0
파일: testCV.py 프로젝트: bdepardo/FaST-LMM
    def __init__(self,Y,X=None, G0=None, appendbias=False, model = None,
                 n_folds_params=10, n_folds=10, scoring = None, verbose = False,
                 n_jobs_grid=1, data_permutation=None, nested=True, greater_is_better=None,
                 nullModel=None, altModel=None):#Note that this code will break as we don't know what it does
        association.varcomp_test.__init__(self,Y=Y,X=X,appendbias=appendbias)

        assert model is None, "Shouldn't we remove this parameter?"
        self.n_jobs_grid = n_jobs_grid
        self.verbose = verbose
        self.n_folds = n_folds
        self.n_folds_params = n_folds_params
        self.G0=G0
        self.nullModel = nullModel
        self.altModel = altModel

        if data_permutation is None:
            data_permutation = util.generatePermutation(self.Y.shape[0],93828231) #permute with an arbitrary seed
        self.data_permutation = data_permutation

        if 'param_grid' in nullModel:
            param_grid = nullModel['param_grid']
        else:
            param_grid = self._getParamGrid(G0, None, nullModel)

        if scoring is None:
            (self.scoring, self.greater_is_better) = self._getScoring()
        else:
            self.scoring = scoring
            self.greater_is_better = greater_is_better
  
        model = self._getModel(nullModel, param_grid)
        nested = self._isNested(nullModel)

        self.scores_null, self.params_null = self.score_nestedCV(None, model, param_grid,
                                     self.nullModel['effect'], nested)
예제 #2
0
    def __init__(self,Y,X=None, G0=None, appendbias=False, model = None,
                 n_folds_params=10, n_folds=10, scoring = None, verbose = False,
                 n_jobs_grid=1, data_permutation=None, nested=True, greater_is_better=None,
                 nullModel=None, altModel=None):#Note that this code will break as we don't know what it does
        association.varcomp_test.__init__(self,Y=Y,X=X,appendbias=appendbias)

        assert model is None, "Shouldn't we remove this parameter?"
        self.n_jobs_grid = n_jobs_grid
        self.verbose = verbose
        self.n_folds = n_folds
        self.n_folds_params = n_folds_params
        self.G0=G0
        self.nullModel = nullModel
        self.altModel = altModel

        if data_permutation is None:
            data_permutation = util.generatePermutation(self.Y.shape[0],93828231) #permute with an arbitrary seed
        self.data_permutation = data_permutation

        if 'param_grid' in nullModel:
            param_grid = nullModel['param_grid']
        else:
            param_grid = self._getParamGrid(G0, None, nullModel)

        if scoring is None:
            (self.scoring, self.greater_is_better) = self._getScoring()
        else:
            self.scoring = scoring
            self.greater_is_better = greater_is_better
  
        model = self._getModel(nullModel, param_grid)
        nested = self._isNested(nullModel)

        self.scores_null, self.params_null = self.score_nestedCV(None, model, param_grid,
                                     self.nullModel['effect'], nested)
예제 #3
0
 def __init__(self, numsnps, randomseed, bed):
     self.numsnps=numsnps
     self.randomseed=randomseed
     self.bed = bed
     #very inefficient extraction of a few random SNP indexes:        
     self.snpindlist = utilx.generatePermutation(sp.arange(0,bed.snp_count),randomseed)[0:numsnps]
     self.snpindlist.sort()        
예제 #4
0
 def __init__(self, numsnps, randomseed, bed):
     self.numsnps = numsnps
     self.randomseed = randomseed
     self.bed = bed
     #very inefficient extraction of a few random SNP indexes:
     self.snpindlist = utilx.generatePermutation(
         sp.arange(0, bed.snp_count), randomseed)[0:numsnps]
     self.snpindlist.sort()
예제 #5
0
def genphen(y_G0,G1,covDat,options,nInd,K1=None,fracCausal=None,randseed=None):
    '''
    Generate synthetic phenotype with a LMM and linear kernels, using SNPs in G1 for signal,
    snps in GO for background, and one of two link functions.
    If genlink=='linear', uses linear LMM. If genlink='logistic', then thresholds to get binary.

    fracCausal is the fraction of SNPs that are causal (rounding up) when G1 is provided
    
    Only one of G1 and K1 can be not None (G1 is good for low rank, K1 for full rank)
    
    Returns:    
        y  (binary, or real-valued, as dictated by genlink)      
        If y is binary, casefrac are 1s, and the rest 0s (default casefrac=0.5)
    Notes: uses sp.random.X so that the seed that was set can be used
    '''    
    
    sp.random.seed(int(randseed % sys.maxint)) 

    if options.has_key("numBackSnps") and options["numBackSnps"]>0:
        raise Exception("I accidentally deleted this move from FastLMmSet to here, see code for FastLmmSet.py from 11/24/2013")
    
    ## generate from the causal (not background) SNPs---------------
    assert not (G1 is not None and K1 is not None), "need to provide only either G1 or K1"
    fracCausal=options['fracCausal']    

    if G1 is not None and options["varG"]>0:    
        if fracCausal>1.0 or fracCausal<0.01: raise Exception("fraCausal should be between 0.01 and 1")    
        nSnp=G1.shape[1]        
        if fracCausal !=1.0:
            nSnpNew=sp.ceil(fracCausal*nSnp)            
            permutationIndex = utilx.generatePermutation(sp.arange(0,nSnp),randseed)[0:nSnpNew]            
            G1new=G1[:,permutationIndex]
        else:     
            nSnpNew=nSnp       
            G1new=G1
    elif K1 is not None:
        assert(fracCausal==1.0 or fracCausal is None)
        pass
    else:
        assert options['varG']==0, "varG is not zero, but neither G1 nor K1 were provided"
    
    stdG=sp.sqrt(options['varG'])

    if stdG>0:
        if G1 is not None:
            y_G1=stdG*G1new.dot(sp.random.randn(nSnpNew,1))    #good for low rank
        else:
            K1chol = la.cholesky(K1)
            y_G1=stdG*K1chol.dot(sp.random.randn(nInd,1))       #good for full rank
    else:
        y_G1=0.0
   ##----------------------------------------------------------------

    if covDat is not None: 
        nCov=covDat.shape[1]    
        covWeights=sp.random.randn(nCov, 1)*sp.sqrt(options['varCov'])
        y_beta=covDat.dot(covWeights)
    else:
        y_beta=0.0
           
    y_noise_t=0    
    #heavy-tailed noise 
    if options['varET']>0:        
        y_noise_t=sp.random.standard_t(df=options['varETd'],size=(nInd,1))*sp.sqrt(options['varET'])          
    else:
        y_noise_t=0
    
    #gaussian noise
    y_noise=sp.random.randn(nInd,1)*sp.sqrt(options['varE'])  
                       
    y=y_noise + y_noise_t + y_G0 + y_beta + y_G1   
    y=y[:,0]#y.flatten()          

   
    if options['link']=='linear':
        return y
    elif options['link']=='logistic':
        if options['casefrac'] is None: options['casefrac']=0.5        
        ysort=sp.sort(y,axis=None)
        thresh=ysort[sp.floor(nInd*options['casefrac'])]
        ybin=sp.array(y>thresh,dtype="float")
        return ybin
    else:
        raise Exception("Invald link function for data generation")
예제 #6
0
def genphen(y_G0,
            G1,
            covDat,
            options,
            nInd,
            K1=None,
            fracCausal=None,
            randseed=None):
    '''
    Generate synthetic phenotype with a LMM and linear kernels, using SNPs in G1 for signal,
    snps in GO for background, and one of two link functions.
    If genlink=='linear', uses linear LMM. If genlink='logistic', then thresholds to get binary.

    fracCausal is the fraction of SNPs that are causal (rounding up) when G1 is provided
    
    Only one of G1 and K1 can be not None (G1 is good for low rank, K1 for full rank)
    
    Returns:    
        y  (binary, or real-valued, as dictated by genlink)      
        If y is binary, casefrac are 1s, and the rest 0s (default casefrac=0.5)
    Notes: uses sp.random.X so that the seed that was set can be used
    '''

    sp.random.seed(int(randseed % sys.maxint))

    if options.has_key("numBackSnps") and options["numBackSnps"] > 0:
        raise Exception(
            "I accidentally deleted this move from FastLMmSet to here, see code for FastLmmSet.py from 11/24/2013"
        )

    ## generate from the causal (not background) SNPs---------------
    assert not (G1 is not None
                and K1 is not None), "need to provide only either G1 or K1"
    fracCausal = options['fracCausal']

    if G1 is not None and options["varG"] > 0:
        if fracCausal > 1.0 or fracCausal < 0.01:
            raise Exception("fraCausal should be between 0.01 and 1")
        nSnp = G1.shape[1]
        if fracCausal != 1.0:
            nSnpNew = sp.ceil(fracCausal * nSnp)
            permutationIndex = utilx.generatePermutation(
                sp.arange(0, nSnp), randseed)[0:nSnpNew]
            G1new = G1[:, permutationIndex]
        else:
            nSnpNew = nSnp
            G1new = G1
    elif K1 is not None:
        assert (fracCausal == 1.0 or fracCausal is None)
        pass
    else:
        assert options[
            'varG'] == 0, "varG is not zero, but neither G1 nor K1 were provided"

    stdG = sp.sqrt(options['varG'])

    if stdG > 0:
        if G1 is not None:
            y_G1 = stdG * G1new.dot(sp.random.randn(nSnpNew,
                                                    1))  #good for low rank
        else:
            K1chol = la.cholesky(K1)
            y_G1 = stdG * K1chol.dot(sp.random.randn(nInd,
                                                     1))  #good for full rank
    else:
        y_G1 = 0.0
##----------------------------------------------------------------

    if covDat is not None:
        nCov = covDat.shape[1]
        covWeights = sp.random.randn(nCov, 1) * sp.sqrt(options['varCov'])
        y_beta = covDat.dot(covWeights)
    else:
        y_beta = 0.0

    y_noise_t = 0
    #heavy-tailed noise
    if options['varET'] > 0:
        y_noise_t = sp.random.standard_t(
            df=options['varETd'], size=(nInd, 1)) * sp.sqrt(options['varET'])
    else:
        y_noise_t = 0

    #gaussian noise
    y_noise = sp.random.randn(nInd, 1) * sp.sqrt(options['varE'])

    y = y_noise + y_noise_t + y_G0 + y_beta + y_G1
    y = y[:, 0]  #y.flatten()

    if options['link'] == 'linear':
        return y
    elif options['link'] == 'logistic':
        if options['casefrac'] is None: options['casefrac'] = 0.5
        ysort = sp.sort(y, axis=None)
        thresh = ysort[sp.floor(nInd * options['casefrac'])]
        ybin = sp.array(y > thresh, dtype="float")
        return ybin
    else:
        raise Exception("Invald link function for data generation")