Exemple #1
0
def compute_pval_oneclass(X, null_dist, Y=None, single=False, B=9, c=5):

    mictools.utils.check_data(X, Y=Y)

    bins = np.linspace(0, 1, NULL_HIST_RES + 1)

    # observed values/distribution
    names = ['Var1', 'Var2']
    Xa = X.values
    if Y is None:
        _, tic = minepy.pstats(Xa, alpha=B, c=c, est="mic_e")
        index = pd.MultiIndex.from_tuples(list(
            itertools.combinations(X.index, 2)),
                                          names=names)
    else:
        Ya = Y.values
        if single:
            _, tic = mictools.utils.sstats(Xa, Ya, alpha=B, c=c, est="mic_e")
            index = pd.MultiIndex.from_arrays([X.index, Y.index], names=names)
        else:
            _, tic = minepy.cstats(Xa, Ya, alpha=B, c=c, est="mic_e")
            index = pd.MultiIndex.from_product([X.index, Y.index], names=names)
            tic = tic.flatten()

    observed_hist = np.histogram(tic, bins)[0].astype(np.int64)

    # right-tailed area
    observed_hist_cum = np.cumsum(observed_hist[::-1])[::-1]

    # p-values
    null_hist_cum = null_dist["NullCountCum"].values
    pval = (np.interp(tic, bins[:-1], null_hist_cum) + 1) / \
        (null_hist_cum[0] + 1)
    pval = pd.Series(pval, index=index)

    # observed
    obs = pd.Series(tic, index=index)

    # distribution
    index = pd.MultiIndex.from_arrays([bins[:-1], bins[1:]],
                                      names=('BinStart', 'BinEnd'))
    obs_dist = pd.DataFrame(
        {
            "ObsCount": observed_hist,
            "ObsCountCum": observed_hist_cum
        },
        index=index,
        columns=["ObsCount", "ObsCountCum"])

    return obs_dist, obs, pval
Exemple #2
0
def main(params, inputs, outputs):
    ### 输入数据 ###
    X = inputs.x
    Y = inputs.y

    ### 输入参数 ###
    type = params.type

    ### 计算X变量间的最大信息系数MIC和总信息系数TIC ###
    if type == 'X之间':
        mic, tic = pstats(X, alpha=9, c=5, est="mic_e")

    ### 计算X与Y间的最大信息系数MIC和总信息系数TIC ###
    if type == 'X与Y之间':
        mic, tic = cstats(X, Y, alpha=9, c=5, est="mic_e")

    ### 输出结果 ###
    pickle.dump(mic, open(outputs.mic, "wb"))
    pickle.dump(tic, open(outputs.tic, "wb"))
Exemple #3
0
import numpy as np
from minepy import pstats, cstats
import time

np.random.seed(0)

# build the X matrix, 8 variables, 320 samples
X = np.random.rand(8, 320)

# build the Y matrix, 4 variables, 320 samples
Y = np.random.rand(4, 320)

# compute pairwise statistics MIC_e and normalized TIC_e between samples in X,
# B=9, c=5
mic_p, tic_p =  pstats(X, alpha=9, c=5, est="mic_e")

# compute statistics between each pair of samples in X and Y
mic_c, tic_c =  cstats(X, Y, alpha=9, c=5, est="mic_e")

print "normalized TIC_e (X):"
print tic_p
print "MIC_e (X vs. Y):"
print mic_c
Exemple #4
0
# encoding=utf-8

import numpy as np
from minepy import pstats, cstats
import time

np.random.seed(0)

# build the X matrix, 8 variables, 320 samples
X = np.random.rand(8, 320)

# build the Y matrix, 4 variables, 320 samples
Y = np.random.rand(4, 320)

# compute pairwise statistics MIC_e and normalized TIC_e between samples in X,
# B=9, c=5
mic_p, tic_p = pstats(X, alpha=9, c=5, est="mic_e")

# compute statistics between each pair of samples in X and Y
mic_c, tic_c = cstats(X, Y, alpha=9, c=5, est="mic_e")

print("normalized TIC_e (X):")
print(tic_p)
print("MIC_e (X vs. Y):")
print(mic_c)

Exemple #5
0
def score_interaction(bait, prey, bait_monomer_sec_id, prey_monomer_sec_id):
    def longest_intersection(arr):
        # Compute longest continuous stretch
        n = len(arr)
        s = set()
        ans = 0
        for ele in arr:
            s.add(ele)
        for i in range(n):
            if (arr[i] - 1) not in s:
                j = arr[i]
                while (j in s):
                    j += 1
                ans = max(ans, j - arr[i])
        return ans

    def normalized_xcorr(a, b):
        # Normalize matrices
        a = (a - np.mean(a, axis=1, keepdims=True)) / (np.std(
            a, axis=1, keepdims=True))
        b = (b - np.mean(b, axis=1, keepdims=True)) / (np.std(
            b, axis=1, keepdims=True))

        nxcorr = []  # normalized cross-correlation
        lxcorr = []  # cross-correlation lag

        if np.array_equal(a, b):
            # Compare all rows of a against all rows of a, including itself (auto-correlation)
            for i in range(0, len(a)):
                for j in range(i, len(a)):
                    nxcorr.append(
                        np.correlate(a[i], a[j], 'valid')[0] /
                        len(a[i]))  # Normalize by length
                    lxcorr.append(np.argmax(np.correlate(a[i], a[j],
                                                         'same')))  # Peak
        else:
            # Compare all rows of a against all rows of b
            for i in range(0, len(a)):
                for j in range(0, len(b)):
                    nxcorr.append(
                        np.correlate(a[i], b[j], 'valid')[0] /
                        len(a[i]))  # Normalize by length
                    lxcorr.append(np.argmax(np.correlate(a[i], b[j],
                                                         'same')))  # Peak

        return np.array(nxcorr), np.array(lxcorr)

    def sec_xcorr(bm, pm):
        # Compute SEC xcorr scores
        bnx, blx = normalized_xcorr(bm, bm)
        pnx, plx = normalized_xcorr(pm, pm)
        bpnx, bplx = normalized_xcorr(bm, pm)

        xcorr_shape = np.mean(bpnx)
        xcorr_apex = np.mean(bplx)
        xcorr_shift = max(
            [abs(xcorr_apex - np.mean(blx)),
             abs(xcorr_apex - np.mean(plx))])

        return xcorr_shape, xcorr_shift, xcorr_apex

    def mass_similarity(bm, pm):
        # Sum bait and prey peptides
        bpabundance = np.sum(bm, axis=1, keepdims=True).mean()
        ppabundance = np.sum(pm, axis=1, keepdims=True).mean()

        # Compute abundance ratio of bait and prey protein
        abundance_ratio = bpabundance / ppabundance
        if abundance_ratio > 1:
            abundance_ratio = 1 / abundance_ratio

        return abundance_ratio

# Compute bait and prey overlap

    overlap = (np.nansum(bait, axis=0) > 0) | (np.nansum(prey, axis=0) > 0)
    total_overlap = np.count_nonzero(overlap)

    # Compute bait and prey intersection
    intersection = (np.nansum(bait, axis=0) > 0) & (np.nansum(prey, axis=0) >
                                                    0)
    total_intersection = np.count_nonzero(intersection)
    if total_intersection > 0:
        longest_intersection = longest_intersection(intersection.nonzero()[0])

        # Require at least three consecutive overlapping data points
        if longest_intersection > 2:
            # Prepare total bait and prey profiles & Replace nan with 0
            total_bait = np.nan_to_num(bait)
            total_prey = np.nan_to_num(prey)

            # Remove non-overlapping segments
            bait[:, ~intersection] = np.nan
            prey[:, ~intersection] = np.nan

            # Remove completely empty peptides
            bait = bait[(np.nansum(bait, axis=1) > 0), :]
            prey = prey[(np.nansum(prey, axis=1) > 0), :]

            # Replace nan with 0
            bait = np.nan_to_num(bait)
            prey = np.nan_to_num(prey)

            # Require at least one remaining peptide for bait and prey
            if (bait.shape[0] > 0) and (prey.shape[0] > 0):
                # Compute cross-correlation scores
                xcorr_shape, xcorr_shift, xcorr_apex = sec_xcorr(bait, prey)

                # Compute MIC/TIC scores
                mic_stat, tic_stat = cstats(bait[:, intersection],
                                            prey[:, intersection],
                                            est="mic_e")
                mic = mic_stat.mean(axis=0).mean(
                )  # Axis 0: summary for prey peptides / Axis 1: summary for bait peptides
                tic = tic_stat.mean(axis=0).mean(
                )  # Axis 0: summary for prey peptides / Axis 1: summary for bait peptides

                # Compute mass similarity score
                abundance_ratio = mass_similarity(bait, prey)

                # Compute total mass similarity score
                total_abundance_ratio = mass_similarity(total_bait, total_prey)

                # Compute relative intersection score
                relative_overlap = total_intersection / total_overlap

                # Compute delta monomer score
                delta_monomer = np.abs(bait_monomer_sec_id -
                                       prey_monomer_sec_id)

                # Compute apex monomer score
                apex_monomer = np.min(
                    np.array(bait_monomer_sec_id - xcorr_apex,
                             prey_monomer_sec_id - xcorr_apex))

                return ({
                    'var_xcorr_shape': xcorr_shape,
                    'var_xcorr_shift': xcorr_shift,
                    'var_abundance_ratio': abundance_ratio,
                    'var_total_abundance_ratio': total_abundance_ratio,
                    'var_mic': mic,
                    'var_tic': tic,
                    'var_sec_overlap': relative_overlap,
                    'var_sec_intersection': longest_intersection,
                    'var_delta_monomer': delta_monomer,
                    'var_apex_monomer': apex_monomer
                })