Exemple #1
0
def order_assumptions(weights, alpha, return_p):
    # Right now only works for K = 2
    from scipy.stats import mannwhitneyu as mwu
    T1, p1 = mwu(weights[0][0], weights[0][1])
    T2, p2 = mwu(weights[0][1], weights[1][1])
    #K = len(weights)
    #T = np.zeros(shape = (K, K))
    #p = np.zeros(shape = (K, K))

    if p1 < alpha and p2 < alpha:
        if return_p:
            return True, np.array([p1, p2])
        else:
            return True
    if return_p:
        return False, np.array([p1, p2])
    else:
        return False
    def MWU_vs_average_helper(data, groups, gene):

        output = pd.DataFrame(index=[gene], columns=return_unique(groups))

        for gr in return_unique(groups):
            d1 = data.ix[groups[groups == gr].index]
            d2 = data.ix[groups[groups != gr].index]

            try:
                output.ix[gene, gr] = mwu(d1, d2, alternative='greater')[1]
            except:
                output.ix[gene, gr] = 1.0

        return output
    def MWU_vs_groups_helper(data, groups, gene):

        output = pd.DataFrame(index=[gene], columns=return_unique(groups))

        for gr1 in return_unique(groups):
            d1 = data.ix[groups[groups == gr1].index]
            pvals = []

            for gr2 in [gr2 for gr2 in return_unique(groups) if gr2 != gr1]:
                d2 = data.ix[groups[groups == gr2].index]

                try:
                    pval_tmp = mwu(d1, d2, alternative='greater')[1]
                except:
                    pval_tmp = 1.0

                pvals.append(pval_tmp)

            output.ix[gene, gr1] = np.max(pvals)

        return output.astype(float)
Exemple #4
0
 def test_apply_test_with_test(self):
     test = StatTest.from_library("Mann-Whitney")
     self.assertAlmostEqual(
         mwu(self.x, self.y, alternative="two-sided")[1],
         apply_test(self.x, self.y, test).pvalue)
Exemple #5
0
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind, mannwhitneyu as mwu
import matplotlib.pyplot as plt

v = pd.read_table('/Users/nate/Projects/EBV_interactome/stad/stadmirs_counts211neg24pos.fordeseq.tsv', index_col=0)
s = pd.read_table('/Users/nate/Projects/EBV_interactome/stad/CIBERSORTx_Job190_Results.csv', index_col=0,sep=',')
s = s.loc[v.columns]


new = pd.DataFrame(index=s.columns[:-4])
fcs, ps, means = [], [], []
for i in new.index:
    fcs.append((np.mean(s.iloc[211:][i])+.01) / (np.mean(s.iloc[:211][i]) +.01))
    ps.append(mwu(s.iloc[211:][i], s.iloc[:211][i], alternative='two-sided')[1])
    means.append(np.mean(s[i] + .01))
    
new['fc'] = fcs
new['ps'] = ps
new['mean'] = means
new = new.sort_values('fc')

fig, ax = plt.subplots(figsize=(6,8))
plt.scatter(np.log2(new['fc']),range(len(new.index)),s= 100 * new['mean'],c=['r' if x < .05 else "0.75" for x in new['ps']],alpha=0.7, lw=0)
ax.set_yticks(range(len(new.index)))
ax.set_yticklabels(new.index)
plt.xlim([-3.5, 3.5])
plt.tight_layout()
plt.savefig('/Users/nate/Projects/EBV_interactome/stad/stad_cib_ebv_posvneg.svg')

            euc_dist = this_mean_dist,
            cos_dist = this_mean_cos_dist,
            index = [0]))
    
    dist_frame = pd.concat([dist_frame, this_dist_frame])
    
    print(this_index)

low_cov_nrns['file'] = low_cov_nrns.index.get_level_values('file')
low_cov_nrns['neuron'] = low_cov_nrns.index.get_level_values('neuron')
low_cov_nrns = pd.merge(low_cov_nrns,dist_frame)

plt.figure();sns.swarmplot(x = 'stable',y='euc_dist',data=low_cov_nrns)
plt.figure();sns.swarmplot(x = 'stable',y='cos_dist',data=low_cov_nrns)

mwu(low_cov_nrns.query('stable == True').euc_dist, low_cov_nrns.query('stable == False').euc_dist)
mwu(low_cov_nrns.query('stable == True').cos_dist, low_cov_nrns.query('stable == False').cos_dist)
# =============================================================================
# =============================================================================
        # Pull out and cluster distance matrices
        clust_post_dist = nrn_dist[trial_order,:]
        clust_post_dist = clust_post_dist[:,trial_order]
        
        ## Distance matrix cluster plots
        plt.figure()
        plt.subplot(221);plt.imshow(exposure.equalize_hist(nrn_dist));plt.title('Un Stim')
        plt.subplot(222);plt.imshow(exposure.equalize_hist(clust_post_dist));plt.title('Clust Stim')
        line_num = np.where(np.diff(np.sort(this_groups)))[0]
        for point in line_num:
            plt.axhline(point+0.5,color = 'red')
            plt.axvline(point+0.5,color = 'red')        
        zip(['step_size', 'window_size', 'total_time'], [25, 250, 7000]))
    data.get_data()
    data.get_firing_rates()

    for nrn in range(data.off_spikes[0].shape[0]):
        for taste in range(4):

            # Only take neurons which fire in every trial
            all_spikes = np.concatenate(
                (np.asarray(data.off_spikes), np.asarray(data.on_spikes)),
                axis=2)
            all_spikes = all_spikes[:, nrn, :, 2000:4000]
            if not (np.sum((np.sum(all_spikes, axis=2) == 0).flatten()) > 0):

                this_off = np.asarray(data.normal_off_firing)
                this_off = this_off[:, nrn, :, 80:160]
                this_off_mean = np.mean(this_off, axis=1)

                this_on = np.asarray(data.normal_on_firing)
                this_on = this_on[:, nrn, :, 80:160]
                this_on_mean = np.mean(this_on, axis=1)

                # Perform Mann-Whitney U-test on every timepoint
                alpha = 0.05 / this_off.shape[2]
                p_vals = np.empty((this_off.shape[0], this_off.shape[2]))
                for taste in range(4):
                    for time in range(this_off.shape[2]):
                        p_vals[taste, time] = mwu(this_off[taste, :, time],
                                                  this_on[taste, :, time])[1]
                significant = np.sum(p_vals < alpha, axis=1) > 100 / 25
                min_p_vals = np.min(p_vals, axis=1)
Exemple #8
0
# Perform the mann whitney u test for each tail
for alternative in ["less", "greater"]:
    for crisis in crises_df:
        # Skip over country code column
        if crisis == "cc3":
            continue

        fra_sample, gbr_sample = [], []
        # Gather each countries data
        for cc in ccs:
            if get_colonist(cc) == "FRA":
                fra_sample.append(
                    crises_df[crisis].loc[crises_df["cc3"] == cc].sum())
            elif get_colonist(cc) == "GBR":
                gbr_sample.append(
                    crises_df[crisis].loc[crises_df["cc3"] == cc].sum())

        # Print our results and hypotheses
        print(f"H0: {crisis} distribution is the same for former British " +
              "and French colonies.")
        print(f"H1: {crisis} distribution is {alternative} for former " +
              "British colonies compared to former French colonies.")
        print("Note: In our case a greater distribution would equal " +
              "a less stable economy")
        print()
        mwuresult = mwu(gbr_sample, fra_sample, alternative=alternative)
        print("U statistic:", mwuresult.statistic)
        print("P-value:    ", mwuresult.pvalue)
        print()
Exemple #9
0
def plot_boxes(xdata, ydata, labels, colors, ax = None):
    """
    Generate a box plot from a list containing the data 
    perform a Mann-Whitney U-test to test for mean differences.

    Arguments:
    xdata   -- a list containing data to plot
    ydata   -- a list containing data to plot
    labels -- a list of string containig the variables
    colors -- a list of strings containgin colors

    Returns:
    ax: a box plots with where the horizontal line is the
    median, boxes the first and third quartiles, and 
    the whiskers the most extreme data points <1.5x
    the interquartile distance form the edges. It also
    show single data form the experiments.

    info: the mean and standard error of the samples, together with the
    the probability that the means are the same.
    """
    if ax is None:
        ax = plt.gca() # if not given, get current axis

    # Box plots (sym = '' do not mark outliners)
    data = [xdata, ydata]
    bp = ax.boxplot(data, widths = 0.45, patch_artist=1, sym='')
    # add sample size to labels

    xlabels = list()
    for i in enumerate(data):
        xlabels.append(labels[i] + '\n(n=' + str(len(data[i])) + ')')
    ax.set_xticklabels(xlabels)

    for patch, color in zip(bp['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_edgecolor('black')
        patch.set_alpha(0.1)
        patch.set_linewidth(2)

    for patch in bp['whiskers']:
        patch.set(color='black', lw=3, ls='-')

    for cap in bp['caps']:
        cap.set(color='black', lw=3)

    for patch, color in zip(bp['medians'], colors):
        patch.set_color(color)
        patch.set_linewidth(3)

    # plot data points 
    mean = 1
    for points, color in zip(data, colors):
        xval = np.random.normal(loc = mean, scale = .045, size=len(points))
        mean +=1
        ax.plot(xval, points, 'o', color=color, ms=4)

    # remove axis and adjust
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.get_yaxis().tick_left()

    # xlabels
    xlabels = list()
    for i in enumerate(data):
        xlabels.append(labels[i] + '\n(n=' + str(len(data[i])) + ')')
    #ax.set_xticklabels(xlabels, fontsize = 14)
    ax.set_xticks([1,2])
    ax.xaxis.set_ticks_position('none')

    # statistics
    stats_0 =  ( labels[0],np.mean(data[0]), sem(data[0]), len(data[0]) )
    stats_1 =  ( labels[1],np.mean(data[1]), sem(data[1]), len(data[1]) )
    print('%s = %2.4f +/- %2.4f, n = %d' %stats_0)
    print('%s = %2.4f +/- %2.4f, n = %d' %stats_1)
    u_test = mwu(data[0], data[1], alternative = 'two-sided')[1]
    print('P = %2.4f, Mann-Whitney (two-sided U test)\n'%u_test)

    infostats = {'P-value': u_test}

    return(ax, infostats)
Exemple #10
0
def plot_bars(xdata, ydata, labels, colors, ax = None):
    """
    Generate a bar plot from a list containing the data
    perform a Mann-Whitney U-test to test for mean differences.

    Arguments
    ----------
    xdata   -- a list containing data to plot
    ydata   -- a list containing data to plot
    labels -- a list of string containig the variable names
    colors -- a list of strings containgin colors to plot the bars

    Returns:
    ax: a bar plot with the means, error bars with the standard error
    of the mean, and single data points.
    info: hhe mean and standard error of the samples, together with the
    the probability that the means are the same.
    """
    if ax is None:
        ax = plt.gca() # if not given, get current axis

    data = [xdata, ydata]

    yloc = (1,2)
    # add sample size to labels
    avg = np.mean(data[0]), np.mean(data[1])
    myparams = dict(width = 0.65, color = colors, align = 'center',
            alpha = 0.5)
    # bar
    ax.bar(yloc, avg, **myparams)

    # single data points and error bars
    mycaps = dict(capsize = 10, elinewidth = 3, markeredgewidth = 3)

    yerr0 = sem(data[0])
    xloc0 = np.random.normal(loc=1, scale=0.09, size = len(data[0]))
    ax.errorbar(yloc[0], avg[0], yerr0, color=colors[0], **mycaps) 
    ax.plot(xloc0, data[0], 'o', ms=4, color='k')

    yerr1 = sem(data[1])
    xloc1 = np.random.normal(loc=2, scale=0.09, size = len(data[1]))
    ax.errorbar(yloc[1], avg[1], yerr1, color=colors[1], **mycaps)
    ax.plot(xloc1, data[1], 'o', ms=4, color='k')
    
    # remove axis and adjust    
    ax.set_xlim(0,3)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)

    # xlabels
    xlabels = list()
    for i in enumerate(data):
        xlabels.append(labels[i] + '\n(n=' + str(len(data[i])) + ')')
    ax.set_xticklabels(xlabels, fontsize=14)
    ax.set_xticks([1,2])
    ax.xaxis.set_ticks_position('none')

    # statistics
    stats_0 =  ( labels[0],np.mean(data[0]), sem(data[0]), len(data[0]) )
    stats_1 =  ( labels[1],np.mean(data[1]), sem(data[1]), len(data[1]) )
    print('%s = %2.4f +/- %2.4f, n = %d' %stats_0)
    print('%s = %2.4f +/- %2.4f, n = %d\n' %stats_1)
    u_test = mwu(data[0], data[1], alternative = 'two-sided')[1]
    print('P = %2.4f, Mann-Whitney (two-side U test)'%u_test)

    infostats = {'P-value': u_test}

    return(ax, infostats)