コード例 #1
0
ファイル: Markov.py プロジェクト: airanmehr/bio
def computePowerForSandSaveRealData(sh, NumericallyStable=False, TakeLog=False, N = 1000,save=True):
    def computeTs(T):
        T2=T.dot(T).astype(float)
        T3=T2.dot(T)
        T4=T2.dot(T2)
        T5=T3.dot(T2)
        T10=T5.dot(T5)
        T12=T10.dot(T2)
        T14=T4.dot(T10)
        T15=T5.dot(T10)
        T22=T12.dot(T10)
        T23=T22.dot(T)
        if TakeLog:
            return pd.Series(map(utl.numbaLog, [T10, T12, T14, T15, T22, T23]), index=[10, 12, 14, 15, 22, 23])
        else:
            return pd.Series([T10, T12, T14, T15, T22, T23], index=[10, 12, 14, 15, 22, 23])
    s,h=sh
    path='{}transition/real/'.format(utl.outpath)
    utl.mkdir(path)
    fname = '{}S{:E}.H{:E}.df'.format(path, np.round(s, 2), h)
      # number of diploids
    # T = Markov.computeTransition(s, N, h=h, takeLog=True) #OLD NUMERICALLY STABLE
    # T=T.apply(lambda x: x-x.max(),axis=1).astype(np.float128).apply(np.exp).apply(lambda x: x/x.sum(),axis=1)
    # Tn=computeTs(T)
    T = Markov.computeTransition(s, N, h=h, takeLog=False)
    Tn=computeTs(T)
    zero = (0, -np.inf)[TakeLog]
    print 'Computed power for s={}, h={}'.format(s, h) + '  Number of zero prob transitions:', (
                                                                                               Tn.iloc[-1] == zero).sum(
        1).iloc[1:-1].sum()
    if save:
        Tn.to_pickle(fname)
    else:
        return Tn
    gc.collect()
コード例 #2
0
ファイル: Utils.py プロジェクト: airanmehr/bio
def runHMM(h, stepS=0.05, eps=1e-1,CD=None,E=None,save=True,verbose=1):
    if CD is None:  CD = pd.read_pickle(utl.outpath + 'real/CDEidx.df').iloc[:]
    if E is None:   E = pd.read_pickle(utl.outpath + 'real/Emissions.df')
    likes_null = getNullLikelihoods(CD,E)
    likes_thn = mkv.computeLikelihoodReal((CD, E, -stepS, h))

    likes_thp = mkv.computeLikelihoodReal((CD[likes_null > likes_thn], E, stepS, h));
    neg = likes_thn[likes_null <= likes_thn];
    zero = likes_null.loc[(likes_null.loc[likes_thp.index] >= likes_thp).replace({False: None}).dropna().index];
    pos = likes_thp.loc[(likes_null.loc[likes_thp.index] < likes_thp).replace({False: None}).dropna().index];
    if verbose>0:
        print 'N={}\t Null={} ({:.0f}\%)\t Pos={}\t Neg={}'.format(CD.shape[0], zero.size,
                                                               zero.size / float(CD.shape[0]) * 100,
                                                               pos.size, neg.size);
    sys.stdout.flush()

    dfz = pd.DataFrame(zero.values, index=zero.index, columns=['alt']);
    dfz['s'] = 0
    dfn = findML(neg, -stepS, CD.loc[neg.index], E, h, eps, stepS)
    dfp = findML(pos, stepS, CD.loc[pos.index], E, h, eps,stepS)

    df = pd.concat([dfp, dfz, dfn])
    df = pd.concat([df, likes_null], axis=1)
    df.columns = pd.MultiIndex.from_product([[h], df.columns], names=['h', 'stat'])
    if save:
        path = utl.outpath + 'real/HMM/'
        utl.mkdir(path)
        df.to_pickle(path + 'h{:E}.df'.format(h))
    return df
コード例 #3
0
ファイル: Run.py プロジェクト: airanmehr/bio
def Power(method, depthRate, nu0, s, numReplicates=3, samplingWindow=50, L=50000, numExperiments=500, numProcess=4):
    param = {'numExperiments': numExperiments, 'method': method, 'numThreads': numProcess, 'ModelName': 'TimeSeries',
             'samplingWindow': samplingWindow, 'L': L, 'numReplicates': numReplicates, 'depthRate': depthRate}
    print  '\nMethod={}\tR={}\twin={}\tnu0={}\ts={}, depthRate={}'.format(method, numReplicates, samplingWindow, nu0, s,
                                                                          depthRate)
    sys.stdout.flush()
    if method in ['CMH', 'HMM'] and depthRate == np.inf: return
    if not s and nu0 == 0.1: return
    param['nu0'] = nu0
    param['s'] = s
    params = getParamsForExperiments(param)
    if numProcess == 1:
        a = map(runOne, params)
    else:
        pool = Pool(numProcess)
        a = pool.map(runOne, params)
        pool.terminate()
    gc.collect()
    df = pd.concat(a)
    sys.stdout.flush()
    df.sortlevel(inplace=True)
    df.dropna(axis=1, how='all', inplace=True)
    print df
    outpath = utl.outpath + 'ROC/runs/'
    utl.mkdir(outpath)
    df.to_pickle('{}{}.{:.0f}.{:E}.{:E}.df'.format(outpath, method, depthRate, nu0, s))
コード例 #4
0
ファイル: MarkovBrownian.py プロジェクト: airanmehr/bio
def runOne(args):
    path = utl.outpath + 'markov/simulations/'
    utl.mkdir(path)
    numExp = int(1e5)
    nu0, s = args
    print nu0, s
    for i, batch in enumerate(utl.batch(range(numExp), 10000)):
        print;
        print i, batch[0], batch[-1]
        a = pd.concat(map(lambda x: Simulation.simulateSingleLoci(nu0=nu0, s=s)[[1, 10, 100]], batch), axis=1).T
        a.to_pickle(path + 'nu{:E}.s{:E}.{}.df'.format(nu0, s, i))
コード例 #5
0
ファイル: createPool.py プロジェクト: airanmehr/bio
def createOneMSMS(param, forceToHaveSoftFreq):
    theta = 2 * param["Ne"] * param["mu"] * param["L"]
    rho = 2 * param["Ne"] * param["r"] * param["L"]
    path = "{}{}/msms/".format(utl.simoutpath, param["ModelName"])
    utl.mkdir(path)
    if isinstance(param["i"], (int, float, long)):
        filename = "{}L{:E}.{:E}.msms".format(path, param["L"], param["i"])
    else:
        filename = "{}L{:E}.{}.msms".format(path, param["L"], param["i"])
    cmd = "java -jar -Xmx2g ~/bin/msms/lib/msms.jar -ms {} 1 -t {:.0f} -r {:.0f} {:.0f} -oFP 0.000000000000E00 > {}".format(
        param["n"], theta, rho, param["L"], filename
    )
    subprocess.call(cmd, shell=True)
    if (
        forceToHaveSoftFreq and not (Simulation.MSMS.load(filename)[0].mean(0) == 0.1).sum()
    ):  # make sure inital freq 0.1 exist
        createOneMSMS(param)
コード例 #6
0
ファイル: Run.py プロジェクト: airanmehr/bio
def PowerForDepth(method, depthRate, numReplicates=3, samplingWindow=50, L=50000, numExperiments=500, numProcess=4):
    df = [];
    Nu = [0.005, 0.1];
    S = [.025, 0.05, 0.075, 0.1]
    param = {'numExperiments': numExperiments, 'method': method, 'numThreads': numProcess, 'ModelName': 'TimeSeries',
             'samplingWindow': samplingWindow, 'L': L, 'numReplicates': numReplicates, 'depthRate': depthRate}
    print 'Nu={}\tS={}\tnumThreads={}\tmethod={}\tnumExperiments={}'.format(Nu, S, numProcess, method, numExperiments)
    sys.stdout.flush()
    if method == 'HMM' and depthRate == np.inf: return

    for nu0 in Nu:
        param['nu0'] = nu0
        for s in S:
            param['s'] = s
            params = getParamsForExperiments(param)
            if numProcess == 1:
                a = map(runOne, params)
            else:
                pool = Pool(numProcess)
                a = pool.map(runOne, params)
                pool.terminate()
            gc.collect()
            df += [pd.concat(a)]
            print  '\nMethod={}\tR={}\twin={}\tnu0={}\ts={}, depthRate={}'.format(method, numReplicates,
                                                                                  samplingWindow, nu0, s, depthRate)
            sys.stdout.flush()
    for param in params: param['s'] = 0;param['nu0'] = 0.005
    pool = Pool(numProcess)
    df += [pd.concat(pool.map(runOne, params))]
    df=pd.concat(df)
    df.sortlevel(inplace=True)
    df.dropna(axis=1,how='all',inplace=True)
    print df
    outpath = utl.outpath + 'ROC/'
    utl.mkdir(outpath)
    df.to_pickle('{}{}.{:.0f}.df'.format(outpath, method, depthRate))