Esempio n. 1
0
def computeLikelihoodRealCDold(args):
    """
    Args: (it's more convenient for multiprocessing)
        args: a list of [R,s,h].
        R: is a dataframe for which each row is a position and columns are allele frequencies.
            ColumnsLevels= [REP, TIME] , IndexLevels=[CHROM,POS]
        s: is selection strength
        h: is overdominance
    Returns:
        a series containing likelihood of timeseries for the specific values of s and h.
    """
    CD, E, s, h, regLambda = args
    print CD.shape, s, h
    if CD.shape[0] > 4 * 1e5:
        numBatches = 5
        idx = np.arange(CD.shape[0])
        return pd.concat(
                map(lambda x: computeLikelihoodRealCDold((CD.iloc[x], E, s, h, regLambda)),
                    np.array_split(idx, numBatches)))
    powers = pd.Series(pd.Series(CD[r].columns).diff().values[1:] for r in range(3))
    T = pd.read_pickle(utl.outpath + 'transition/real/S{:02.0f}.H{:02.0f}.df'.format(s * 100, h * 100))
    likes = pd.Series(0, index=CD.index, name=(s, h))
    for rep, df in CD.T.groupby(level=0):
        alpha = E.loc[df.loc[(rep, 0)]]
        for step, power in zip(range(1, df.shape[0]), powers[rep]):
            alpha = alpha.values.dot(T.loc[power].values) * E.loc[df.loc[rep].iloc[step]]
        likes += utl.vectorizedLog(alpha.mean(1).values)
    return likes - regLambda * abs(s)
Esempio n. 2
0
def computeLikelihoodRealBatch(args):
    CD, E, T, powers = args
    likes = pd.Series(0, index=CD.index)
    for rep, df in CD.T.groupby(level=0):
        alpha = E.iloc[df.loc[(rep, 0)]].values
        for step, power in zip(range(1, df.shape[0]), powers[rep]):
            alpha = alpha.dot(T.loc[power].values) * E.values[df.loc[rep].iloc[step].values]
            #likes += utl.vectorizedLog(alpha.mean(1))
        likes += utl.vectorizedLog(alpha.mean(1)) #it should be here
    return likes