Ejemplo n.º 1
0
def getErr(i=96, s=0.01):
    sim = Simulation.Simulation.load(
        s=s,
        nu0=0.005,
        experimentID=i,
        ModelName="TimeSeries",
        numReplicates=10,
        step=10,
        startGeneration=0,
        maxGeneration=50,
    )
    bot = pd.read_pickle(home + "out/BottleneckedNeutrals.NotNormalized.df")[i].loc[[0, 10, 20, 30, 40, 50]]
    td = Estimate.Estimate.getEstimate(sim.X, n=200, method="tajimaD", removeFixedSites=True, normalizeTajimaD=False)
    td.index = sim.getTrueGenerationTimes()
    ctd = td.apply(lambda x: ((x - bot).diff().iloc[1:]))
    ctd[leaveOneOut(ctd)] = None
    ctd = ctd.apply(regularize)
    t = sim.getTrueGenerationTimes()
    nu = sig(s * t / 2 + logit(0.1))
    a = Estimate.Estimate.getEstimate(sim.X0, n=200, method="pi")
    b = -Estimate.Estimate.getEstimate(sim.X0, n=200, method="watterson") / (1.0 / np.arange(1, 201)).sum()
    f = pd.Series(b * np.log(1 - nu) - a * nu ** 2, index=t).diff().iloc[1:]
    # ctd.mean(1).plot(color='b',legend=False);f.plot(linewidth=3,color='k');
    # ctds.mean(1).plot(ax=plt.gca(),color='r',legend=False);
    return ((ctd.mean(1).sum() - f.mean())) / neurtrality(i, s)
 def fit(self,y=None,pos=1,verbose=0):
     timesIDX=y.abs().sum(1)!=0
     self.y= y[timesIDX]
     self.times= self.Times[np.where(timesIDX.values)[0]]
     obj0=self.getZeroObj()
     if self.initSviaLineSearch:
         self.setInitYslackViaLineSearch()
         self.setInitSviaLineSearch()
     self.reset()
     start_time=time.time()
     for i in range(self.maxIter):
         obj=self.Objective_(self.y,self.lr_s, self.lr_nu, self.lr_theta, self.times, (self.final_momentum , self.initial_momentum)[i<5],self.n, self.theta)
         if verbose>1:
             print obj ,self.S__.get_value(), self.Yslack__.get_value(),sig(self.c__.get_value())
     if verbose:
         print obj ,self.S__.get_value(), self.Yslack__.get_value(),sig(self.c__.get_value())
         
     negLogLikelihoodRatio=np.log(obj0)-np.log(self.Loss_(self.y,self.Feedforward_(self.times,self.n,self.theta)))
     s=np.asscalar(self.S__.get_value())
     if s<1e-6:
         negLogLikelihoodRatio=0
         s=0
     self.sol=pd.Series({'s':s,'LR':negLogLikelihoodRatio,'Time':time.time()-start_time,'pos':pos,'nu0':sig(self.c__.get_value()),'slack':np.asscalar(self.Yslack__.get_value()),'obj':float(obj), 'obj0':float(obj0), 'times': self.times, 'y':self.y, 'theta':self.theta, 'n':self.n, 'smoothTimes': np.tile(np.arange(self.times[0][0] , self.times[-1][0]+1),(self.sim.numReplicates,1)).T})
     return self.sol
Ejemplo n.º 3
0
def load2(iii=96, s=0.01):
    sim = Simulation.Simulation.load(
        s=s,
        nu0=0.005,
        experimentID=iii,
        ModelName="TimeSeries",
        numReplicates=10,
        step=10,
        startGeneration=0,
        maxGeneration=50,
    )
    simn = Simulation.Simulation.load(
        s=0,
        nu0=0.005,
        experimentID=iii,
        ModelName="TimeSeries",
        numReplicates=10,
        step=10,
        startGeneration=0,
        maxGeneration=50,
    )
    bot = pd.read_pickle(home + "out/BottleneckedNeutrals.NotNormalized.df")[iii].loc[[0, 10, 20, 30, 40, 50]]
    td = Estimate.Estimate.getEstimate(
        sim.X, n=200, method="tajimaD", removeFixedSites=True, normalizeTajimaD=False
    ).mean(1)
    td.index = sim.getTrueGenerationTimes()
    tdn = Estimate.Estimate.getEstimate(
        simn.X, n=200, method="tajimaD", removeFixedSites=True, normalizeTajimaD=False
    ).mean(1)
    tdn.index = simn.getTrueGenerationTimes()
    ctd = [td - bot, regularize2(td - bot), regularize3(td - bot)]
    ctdn = [tdn - bot, regularize2(tdn - bot), regularize3(tdn - bot)]

    t = sim.getTrueGenerationTimes()
    nu = sig(s * t / 2 + logit(0.005))
    a = Estimate.Estimate.getEstimate(sim.X0, n=200, method="pi")
    b = -Estimate.Estimate.getEstimate(sim.X0, n=200, method="watterson") / (1.0 / np.arange(1, 201)).sum()
    D0 = Estimate.Estimate.getEstimate(sim.X0, n=200, method="tajimaD", normalizeTajimaD=False)
    Dt = pd.Series(b * np.log(1 - nu) - a * nu ** 2, index=t)
    return map(lambda x: x, ctd), map(lambda x: x, ctdn), Dt
    def fit(self,winidx,windowIndex=None,filterAfterDrop=True,linesearchTheta=False,YslackLineSearch=False):
        if windowIndex is None:
            y=self.sim.getAverageHAF(self.sim.winIdx[winidx])
        else:
            y=self.sim.getAverageHAF(windowIndex)
        self.times= self.Times
        if filterAfterDrop:
            self.lastGenerationIndex = self.sim.filterTimeSamplesWithHighNegDer(y)
        else:
            self.lastGenerationIndex=(np.ones(self.numReplicates)*self.times.shape[0]).astype(int)-1
        self.y=y.values
        self.reset()
        if YslackLineSearch: self.setInitYslackViaSettingInitObservation()
        self.setInitSviaLineSearch()
        
        start_time=time.time()
        if self.verbose>2:
            print 'y:\n{},times:\n{}\nn:{},\ttheta:{}\tlastGenIDX:{}\tRepIDX:{}'.format(self.y,self.times,self.n,self.Theta__.get_value(),self.lastGenerationIndex,self.replicateIndex)
        self.obj=float(self.Loss_(self.y,self.times,self.n,self.lastGenerationIndex,self.replicateIndex))
        if self.verbose>1:      print 'Before\nIter,\tobj,\ts,\ttheta,\tYslack,\tnu\n','{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}'.format(0,self.obj ,float(self.S__.get_value()),float(self.Theta__.get_value()) ,float(self.Yslack__.get_value()),sig(self.c__.get_value()))
        for i in range(self.maxIter):
            self.saveState()
            self.obj=self.Objective_(self.y, self.lr_s, self.lr_nu, self.lr_Yslack, self.lr_theta, self.times, (self.final_momentum , self.initial_momentum)[i<5],self.n, self.lastGenerationIndex,self.replicateIndex)
            if self.verbose>1:  print '{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}'.format(i+1,float(self.obj) ,float(self.S__.get_value()),float(self.Theta__.get_value()) ,float(self.Yslack__.get_value()),sig(self.c__.get_value()))
            if self.obj>self.obj__prev:    
                self.undoStep()
                break
        s, nu0, slack,theta= np.asscalar(self.S__.get_value()), sig(self.c__.get_value()),np.asscalar(self.Yslack__.get_value()),np.asscalar(self.Theta__.get_value())
        obj=self.Loss_(self.y,self.times,self.n,self.lastGenerationIndex,self.replicateIndex)
        
        
        obj0=self.getZeroObj()
        negLogLikelihoodRatio=np.log(obj0)-np.log(obj)
        if s<0:
            negLogLikelihoodRatio=0
            s=0
#         if negLogLikelihoodRatio<0: negLogLikelihoodRatio=0
        self.sol=pd.Series({'s':s,'LR':negLogLikelihoodRatio,'Time':time.time()-start_time,'pos':self.sim.winMidPos[winidx],'nu0':nu0,'slack':slack,'obj':float(obj), 'obj0':float(obj0), 'lastTimes': self.lastGenerationIndex, 'y':self.y, 'theta':theta, 'n':self.n, 'winidx':winidx, 'SLR': np.exp(negLogLikelihoodRatio)*s, 'watterson':Estimate.watterson(self.sim.H0.iloc[:,self.sim.winIdx[winidx]]),'method':'HAF'})
        return self.sol