def computeLikelihoodRealCDold(args): """ Args: (it's more convenient for multiprocessing) args: a list of [R,s,h]. R: is a dataframe for which each row is a position and columns are allele frequencies. ColumnsLevels= [REP, TIME] , IndexLevels=[CHROM,POS] s: is selection strength h: is overdominance Returns: a series containing likelihood of timeseries for the specific values of s and h. """ CD, E, s, h, regLambda = args print CD.shape, s, h if CD.shape[0] > 4 * 1e5: numBatches = 5 idx = np.arange(CD.shape[0]) return pd.concat( map(lambda x: computeLikelihoodRealCDold((CD.iloc[x], E, s, h, regLambda)), np.array_split(idx, numBatches))) powers = pd.Series(pd.Series(CD[r].columns).diff().values[1:] for r in range(3)) T = pd.read_pickle(utl.outpath + 'transition/real/S{:02.0f}.H{:02.0f}.df'.format(s * 100, h * 100)) likes = pd.Series(0, index=CD.index, name=(s, h)) for rep, df in CD.T.groupby(level=0): alpha = E.loc[df.loc[(rep, 0)]] for step, power in zip(range(1, df.shape[0]), powers[rep]): alpha = alpha.values.dot(T.loc[power].values) * E.loc[df.loc[rep].iloc[step]] likes += utl.vectorizedLog(alpha.mean(1).values) return likes - regLambda * abs(s)
def computeLikelihoodRealBatch(args): CD, E, T, powers = args likes = pd.Series(0, index=CD.index) for rep, df in CD.T.groupby(level=0): alpha = E.iloc[df.loc[(rep, 0)]].values for step, power in zip(range(1, df.shape[0]), powers[rep]): alpha = alpha.dot(T.loc[power].values) * E.values[df.loc[rep].iloc[step].values] #likes += utl.vectorizedLog(alpha.mean(1)) likes += utl.vectorizedLog(alpha.mean(1)) #it should be here return likes