コード例 #1
0
ファイル: Run.py プロジェクト: airanmehr/bio
def runTheta(param,SNP,meta):
    print 'Computing theta...'
    results=pd.DataFrame([])
    for chrom in meta['#CHROM'].unique():
        start,end=getSlidingWindows(meta, meta[meta['#CHROM']==chrom].CHROMLen.unique()[0], param['windowSize'], param['windowSize']/5)
        for s,e in zip(start,end):
            idx=np.where((meta['#CHROM']==chrom) & (meta.POS>s) & (meta.POS<=e) )[0]
            dfw=pd.DataFrame(Estimate.getAllEstimates(SNP[:,idx]),columns=['parameter','estimate'])
            dfw['#CHROM'],dfw['start'],dfw['end']=chrom,s,e
            results = pd.concat([results, dfw])
    
    
    for chrom in meta['#CHROM'].unique():
        start,end=getSlidingWindows(meta, meta[meta['#CHROM']==chrom].CHROMLen.unique()[0], param['windowSize'], param['windowSize']/5)
        for s,e in zip(start,end):
            idxs=np.where((meta['#CHROM']==chrom) & (meta.POS>s) & (meta.POS<=e) & (meta.TYPE=='SYNONYMOUS_CODING') )[0]
            idxn=np.where((meta['#CHROM']==chrom) & (meta.POS>s) & (meta.POS<=e) & (meta.TYPE=='NON_SYNONYMOUS_CODING') )[0]
            dfw=pd.DataFrame([('PiS',Estimate.pi(SNP[:,idxs])),('PiN',Estimate.pi(SNP[:,idxn]))],columns=['parameter','estimate'])
            dfw['#CHROM'],dfw['start'],dfw['end']=chrom,s,e
            results = pd.concat([results, dfw])
    results.sort(['#CHROM','start'],inplace=True) # Important
    results.to_pickle(param['outpath'] + 'theta.df')
コード例 #2
0
ファイル: Selection.py プロジェクト: airanmehr/bio
def logit(p): return (np.inf if p==1  else  np.log(p/(1.-p)))
logit(1)
if not param['startGeneration']: 
    x0=X0[param['siteUnderSelection']]
else:
    x0=X[param['startGeneration']/param['generationStep'] -1,param['siteUnderSelection'],:].mean()
x0
c=np.log(1-x0)-np.log(x0)
z=sig(s*np.array(range(param['startGeneration'],param['maxGeneration']+1,param['generationStep'])[1:])/2. -c)
pd.DataFrame(np.append(X[:,3,:],z[:,None],axis=1)).plot()
y= np.array([[np.linalg.norm(X[t,:,r])**2 for t in range(X.shape[0])] for r in range(param['numReplicates'])]).T

y0=np.linalg.norm(X0)**2


theta=Estimate.watterson(np.tile(param['initHaps'],(10,1)));n=2000
theta=Estimate.watterson(param['initHaps']);n=200
a=[(y0 ,Z(X0[3], n, theta))]
for t in range(5):
    a.append((y[t] ,Z(X[t,3,:], n, theta)))

pd.DataFrame(a)

times=range(param['startGeneration'],param['maxGeneration']+1,param['generationStep'])[1:]
times
x0_ = T.scalar("x0")
n_ = T.scalar("n")
theta_ = T.scalar("theta")
times_ = T.ivector("times")
S__=theano.shared(np.asarray(s, dtype = 'float32'), 'S')
predall_, updatesRecurrence_ = theano.scan(lambda x_prev, s: (s*x_prev*x_prev+s*x_prev +2*x_prev)/(2*s*x_prev+2), outputs_info=x0_,non_sequences=S__,n_steps=times_[-1])
コード例 #3
0
    def train(self,i=0):
        if (self.sim.getAverageHAF(self.sim.winIdx[i]).values.argmax(0)==0).mean()>0.5: # inf initial HAF is maximum in more that half of replicates, don't bother, s is zero
            self.sol=pd.Series({'s':0,'LR':0,'Time':0,'pos':self.sim.winMidPos[i],'nu0':self.sim.X0.min(),'slack':0,'obj':0, 'obj0':0, 'lastTimes': 0, 'y':0, 'theta':self.sim.theta, 'n':self.n, 'winidx':i, 'SLR': 0, 'watterson':Estimate.watterson(self.sim.H0.iloc[:,self.sim.winIdx[i]]),'method':'HAF'})
            return self.sol
        self.lr_s=1e-400; self.lr_nu=1e-400; self.maxIter=100;self.lr_theta=1e-3; self.lr_Yslack=1e-3
        self.fit(i,YslackLineSearch=True);self.initTheta=self.sol.theta; self.initYslack=self.sol.slack
#         self.fit(i);self.initTheta=self.sol.theta
        self.lr_s=1e-6; self.lr_nu=1e-6; self.maxIter=100;self.lr_theta=1e-6;self.lr_Yslack=1e-300
        sol=self.fit(i)
        self.setSIM(self.sim)
        return sol
コード例 #4
0
    def fit(self,winidx,windowIndex=None,filterAfterDrop=True,linesearchTheta=False,YslackLineSearch=False):
        if windowIndex is None:
            y=self.sim.getAverageHAF(self.sim.winIdx[winidx])
        else:
            y=self.sim.getAverageHAF(windowIndex)
        self.times= self.Times
        if filterAfterDrop:
            self.lastGenerationIndex = self.sim.filterTimeSamplesWithHighNegDer(y)
        else:
            self.lastGenerationIndex=(np.ones(self.numReplicates)*self.times.shape[0]).astype(int)-1
        self.y=y.values
        self.reset()
        if YslackLineSearch: self.setInitYslackViaSettingInitObservation()
        self.setInitSviaLineSearch()
        
        start_time=time.time()
        if self.verbose>2:
            print 'y:\n{},times:\n{}\nn:{},\ttheta:{}\tlastGenIDX:{}\tRepIDX:{}'.format(self.y,self.times,self.n,self.Theta__.get_value(),self.lastGenerationIndex,self.replicateIndex)
        self.obj=float(self.Loss_(self.y,self.times,self.n,self.lastGenerationIndex,self.replicateIndex))
        if self.verbose>1:      print 'Before\nIter,\tobj,\ts,\ttheta,\tYslack,\tnu\n','{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}'.format(0,self.obj ,float(self.S__.get_value()),float(self.Theta__.get_value()) ,float(self.Yslack__.get_value()),sig(self.c__.get_value()))
        for i in range(self.maxIter):
            self.saveState()
            self.obj=self.Objective_(self.y, self.lr_s, self.lr_nu, self.lr_Yslack, self.lr_theta, self.times, (self.final_momentum , self.initial_momentum)[i<5],self.n, self.lastGenerationIndex,self.replicateIndex)
            if self.verbose>1:  print '{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}'.format(i+1,float(self.obj) ,float(self.S__.get_value()),float(self.Theta__.get_value()) ,float(self.Yslack__.get_value()),sig(self.c__.get_value()))
            if self.obj>self.obj__prev:    
                self.undoStep()
                break
        s, nu0, slack,theta= np.asscalar(self.S__.get_value()), sig(self.c__.get_value()),np.asscalar(self.Yslack__.get_value()),np.asscalar(self.Theta__.get_value())
        obj=self.Loss_(self.y,self.times,self.n,self.lastGenerationIndex,self.replicateIndex)
        
        
        obj0=self.getZeroObj()
        negLogLikelihoodRatio=np.log(obj0)-np.log(obj)
        if s<0:
            negLogLikelihoodRatio=0
            s=0
#         if negLogLikelihoodRatio<0: negLogLikelihoodRatio=0
        self.sol=pd.Series({'s':s,'LR':negLogLikelihoodRatio,'Time':time.time()-start_time,'pos':self.sim.winMidPos[winidx],'nu0':nu0,'slack':slack,'obj':float(obj), 'obj0':float(obj0), 'lastTimes': self.lastGenerationIndex, 'y':self.y, 'theta':theta, 'n':self.n, 'winidx':winidx, 'SLR': np.exp(negLogLikelihoodRatio)*s, 'watterson':Estimate.watterson(self.sim.H0.iloc[:,self.sim.winIdx[winidx]]),'method':'HAF'})
        return self.sol