Beispiel #1
0
    def test_basic(self):
        # median_test calls chi2_contingency to compute the test statistic
        # and p-value.  Make sure it hasn't screwed up the call...

        x = [1, 2, 3, 4, 5]
        y = [2, 4, 6, 8]

        stat, p, m, tbl = stats.median_test(x, y)
        assert_equal(m, 4)
        assert_equal(tbl, [[1, 2], [4, 2]])

        exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl)
        assert_allclose(stat, exp_stat)
        assert_allclose(p, exp_p)

        stat, p, m, tbl = stats.median_test(x, y, lambda_=0)
        assert_equal(m, 4)
        assert_equal(tbl, [[1, 2], [4, 2]])

        exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, lambda_=0)
        assert_allclose(stat, exp_stat)
        assert_allclose(p, exp_p)

        stat, p, m, tbl = stats.median_test(x, y, correction=False)
        assert_equal(m, 4)
        assert_equal(tbl, [[1, 2], [4, 2]])

        exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, correction=False)
        assert_allclose(stat, exp_stat)
        assert_allclose(p, exp_p)
Beispiel #2
0
    def test_ties_options(self):
        # Test the contingency table calculation.
        x = [1, 2, 3, 4]
        y = [5, 6]
        z = [7, 8, 9]
        # grand median is 5.

        # Default 'ties' option is "below".
        stat, p, m, tbl = stats.median_test(x, y, z)
        assert_equal(m, 5)
        assert_equal(tbl, [[0, 1, 3], [4, 1, 0]])

        stat, p, m, tbl = stats.median_test(x, y, z, ties="ignore")
        assert_equal(m, 5)
        assert_equal(tbl, [[0, 1, 3], [4, 0, 0]])

        stat, p, m, tbl = stats.median_test(x, y, z, ties="above")
        assert_equal(m, 5)
        assert_equal(tbl, [[0, 2, 3], [4, 0, 0]])
Beispiel #3
0
def medianTest(df, alpha):

    original = df['Score_original']
    fixed = df['Score_fixed']

    stat, p, med, tbl = stats.median_test(original, fixed)

    if p < alpha:
        test = True
    else:
        test = False

    df['Median Test'] = test
    return df
Beispiel #4
0
    def test_simple(self):
        x = [1, 2, 3]
        y = [1, 2, 3]
        stat, p, med, tbl = stats.median_test(x, y)

        # The median is floating point, but this equality test should be safe.
        assert_equal(med, 2.0)

        assert_array_equal(tbl, [[1, 1], [2, 2]])

        # The expected values of the contingency table equal the contingency table,
        # so the statistic should be 0 and the p-value should be 1.
        assert_equal(stat, 0)
        assert_equal(p, 1)
Beispiel #5
0
    def test_simple(self):
        x = [1, 2, 3]
        y = [1, 2, 3]
        stat, p, med, tbl = stats.median_test(x, y)

        # The median is floating point, but this equality test should be safe.
        assert_equal(med, 2.0)

        assert_array_equal(tbl, [[1, 1], [2, 2]])

        # The expected values of the contingency table equal the contingency table,
        # so the statistic should be 0 and the p-value should be 1.
        assert_equal(stat, 0)
        assert_equal(p, 1)
Beispiel #6
0
def test_mood(a1, a2):
    """
    Runs Mood's median test on the two supplied arrays, requires ndarrays with at least 2 distinct values
    :param list a1: array 1
    :param list a2: array 2
    :return: p-value
    :rtype: float
    """
    if isinstance(a1, np.ndarray) and isinstance(a2, np.ndarray):
        if len(set(a1)) > 1 and len(
                set(a2)) > 1:  # require at least 2 distinct values
            _, p, _, _ = stats.median_test(a1, a2)
            return p
        else:
            return np.NaN
    else:
        return np.NaN
Beispiel #7
0
def plotResults(distH, distA, titleString, digits=5):
    boxplot([distH, distA], notch=True, widths=0.25, positions=[0.75, 1.25], \
            labels=['Diagnosed', 'Typical'])
    title(titleString, fontsize=18)
    xticks(fontsize=14)
    yticks(fontsize=10)
    show()
    stat, pValue = ttest_ind(distH, distA)
    print(titleString)
    print('mean / med:\t, diagnosed:', round(mean(distH), digits), '/', round(median(distH), digits), \
          '\ttypical:', round(mean(distA), digits), '/', round(median(distA), digits))
    print('t significance level p =', round(pValue, digits))
    stat, pValue = mannwhitneyu(distH, distA)
    print('U significance level p =', round(pValue, digits))
    stat, pValue, m, table = median_test(distH, distA, correction=False)
    print('median significance level p =', round(pValue, digits))
    print()
Beispiel #8
0
 def compare_to_auto(vals, weights):
     # Mood's median test stat is chisq -- near 0 for similar median
     try:
         stat, _p, _med, cont = median_test(auto_l, vals, ties='ignore',
                                            lambda_='log-likelihood')
     except ValueError:
         # "All values are below the grand median (0.0)"
         stat = None
     else:
         if stat == 0 and 0 in cont:
             stat = None
     # In case Mood's test failed for either sex
     if use_weight:
         med_diff = abs(descriptives.weighted_median(auto_l, auto_w) -
                        descriptives.weighted_median(vals, weights))
     else:
         med_diff = abs(np.median(auto_l) - np.median(vals))
     return (stat, med_diff)
Beispiel #9
0
 def compare_to_auto(vals, weights):
     # Mood's median test stat is chisq -- near 0 for similar median
     try:
         stat, _p, _med, cont = median_test(auto_l, vals, ties='ignore',
                                            lambda_='log-likelihood')
     except ValueError:
         # "All values are below the grand median (0.0)"
         stat = None
     else:
         if stat == 0 and 0 in cont:
             stat = None
     # In case Mood's test failed for either sex
     if use_weight:
         med_diff = abs(descriptives.weighted_median(auto_l, auto_w) -
                        descriptives.weighted_median(vals, weights))
     else:
         med_diff = abs(np.median(auto_l) - np.median(vals))
     return (stat, med_diff)
Beispiel #10
0
def test_significance_tests(normal_obs, normal_obs_control):
    treatment = ab.sample(normal_obs)
    control = ab.sample(normal_obs_control)
    res = treatment.t_test(control, equal_var=True)
    res_expected = ttest_ind(normal_obs, normal_obs_control, equal_var=True)
    assert res.p_value == res_expected.pvalue
    assert res.statistic == res_expected.statistic

    res = treatment.t_test(control, equal_var=False)
    res_expected = ttest_ind(normal_obs, normal_obs_control, equal_var=False)
    assert res.p_value == res_expected.pvalue
    assert res.statistic == res_expected.statistic

    res = treatment.t_test_1samp(101)
    res_expected = ttest_1samp(normal_obs, 101)
    assert res.p_value == res_expected.pvalue
    assert res.statistic == res_expected.statistic

    res = treatment.mann_whitney_u_test(control)
    res_expected = mannwhitneyu(normal_obs_control, normal_obs, alternative='two-sided')
    assert res.p_value == pytest.approx(res_expected.pvalue, 1e-6)
    assert res.u_statistic == res_expected.statistic

    res = treatment.shapiro_test()
    res_expected = shapiro(normal_obs)
    assert res.statistic == res_expected[0]
    assert res.p_value == res_expected[1]

    res = treatment.median_test(control)
    res_expected = median_test(normal_obs, normal_obs_control)
    assert res.p_value == res_expected[1]
    assert res.statistic == res_expected[0]
    assert res.grand_median == res_expected[2]

    res = treatment.levene_test(control)
    res_expected = levene(normal_obs, normal_obs_control)
    assert res.p_value == res_expected.pvalue
    assert res.statistic == res_expected.statistic

    res = treatment.mood_test(control)
    res_expected = mood(normal_obs, normal_obs_control)
    assert res.p_value == res_expected[1]
    assert res.statistic == res_expected[0]
Beispiel #11
0
    def fit(self, *args, **kwargs):
        """Perform a Mood’s median test.

        Parameters
        ----------
        sample1, sample2, … : array_like
            The set of samples. There must be at least two samples. Each 
            sample must be a one-dimensional sequence containing at least 
            one value. The samples are not required to have the same length.
        ties : str, optional
            Determines how values equal to the grand median are classified 
            in the contingency table. The string must be one of:
            "below":
                Values equal to the grand median are counted as "below".
            "above":
                Values equal to the grand median are counted as "above".
            "ignore":
                Values equal to the grand median are not counted.
            The default is “below”.
        correction : bool, optional
            If True, and there are just two samples, apply Yates’ correction for 
            continuity when computing the test statistic associated with the 
            contingency table. Default is True.
        lambda_ : float or str, optional
            By default, the statistic computed in this test is Pearson’s 
            chi-squared statistic. lambda_ allows a statistic from the 
            Cressie-Read power divergence family to be used instead. 
            See power_divergence for details. Default is 1 
            (Pearson’s chi-squared statistic).
        nan_policy : {‘propagate’, ‘raise’, ‘omit’}, optional
            Defines how to handle when input contains nan. ‘propagate’ returns 
            nan, ‘raise’ throws an error, ‘omit’ performs the calculations ignoring 
            nan values. Default is ‘propagate’.        
        """
        self._statistic, self._p, self._m, self._ctable = median_test(
            *args, **kwargs)
Beispiel #12
0
def median_test(*args):
    median, pval, m, table = ss.median_test(*args)
    return median, pval, m, table
Beispiel #13
0
# -*- coding: utf-8 -*-

import math
import random
from scipy import stats

# Test Mood's równoci median:

stats.median_test(dane_1, dane_2)

# Test U Manna Whitney'a (nieparametryczny odpowiednik testu t-studenta dla prób niezależnych):

stats.mannwhitneyu(dane_1, dane_2)

# Test Wilcoxsona (odpowiednik testu t-studenta dla prób zależnych):

stats.wilcoxon(dane_1, dane_2)

# Test Kurskala - Wallisa (nieparametryczny odpowiednik jednoczynnikowej ANOVA dla prób niezależnych):

stats.kruskal(dane_1, dane_2, dane_3)

# Test Friedmana (nieparametryczny odpowiednik jednoczynnikowej ANOVA dla prób zależnych):

stats.friedmanchisquare(dane_1, dane_2, dane_3)
def median_test(list_of_samples):
    '''
	[[],[],..]
	'''
    stat, p, med, tbl = stats.median_test(*list_of_samples)
    return p
Beispiel #15
0
def expectation_index_hist(database, name):

    # -- Ascending --
    highFreqRespCellsA = []
    midFreqRespCellsA = []
    lowFreqRespCellsA = []
    for indRow, dbRow in database.iterrows():
        pValueHighA = database['pValHighResponseA'][indRow]
        pValueMidA = database['pValMidResponseA'][indRow]
        pValueLowA = database['pValLowResponseA'][indRow]
        pValuesA = dict(pValueHA=pValueHighA,
                        pValueMA=pValueMidA,
                        pValueLA=pValueLowA)
        # -- The best frequency is the one with the lowest pValue in sound responsive cells. --
        minimumA = min(pValuesA, key=pValuesA.get)
        # -- Appending to a list the cells that were most responsive to each of the three frequencies. --
        if minimumA == 'pValueHA':
            highFreqRespCellsA.append(database.iloc[indRow])
        elif minimumA == 'pValueMA':
            midFreqRespCellsA.append(database.iloc[indRow])
        else:
            lowFreqRespCellsA.append(database.iloc[indRow])

    respHighA = pd.DataFrame(
        highFreqRespCellsA
    )  # Database of cells where the high frequency tone in the ascending sequence is the most responsive
    respMidA = pd.DataFrame(
        midFreqRespCellsA
    )  # Database of cells where the middle frequency tone in the ascending sequence is the most responsive
    respLowA = pd.DataFrame(
        lowFreqRespCellsA
    )  # Database of cells where the low frequency tone in the ascending sequence is the most responsive

    signRespCellsHighA = respHighA.query(
        'pValHighFRA < 0.05'
    )  # Cells that were most responsive for the high frequency sound that also show a significant difference in firing between the high frequency oddball and standard (first oddball/std)
    signRespCellsMidA = respMidA.query(
        'pValMidFRA < 0.05'
    )  # Cells that were most responsive for the middle frequency sound that also show a significant difference in firing between the high frequency oddball and standard (first oddball/std)
    signRespCellsLowA = respLowA.query(
        'pValLowFRA < 0.05'
    )  # Cells that were most responsive for the low frequency sound that also show a significant difference in firing between the high frequency oddball and standard (first oddball/std)

    # -- Descending --
    highFreqRespCellsD = []
    midFreqRespCellsD = []
    lowFreqRespCellsD = []
    for indRow, dbRow in database.iterrows():
        pValueHighD = database['pValHighResponseD'][indRow]
        pValueMidD = database['pValMidResponseD'][indRow]
        pValueLowD = database['pValLowResponseD'][indRow]
        pValuesD = dict(pValueHD=pValueHighD,
                        pValueMD=pValueMidD,
                        pValueLD=pValueLowD)
        # -- The best frequency is the one with the lowest pValue in sound responsive cells. --
        minimumD = min(pValuesD, key=pValuesD.get)

        if minimumD == 'pValueHD':
            highFreqRespCellsD.append(database.iloc[indRow])
        elif minimumD == 'pValueMD':
            midFreqRespCellsD.append(database.iloc[indRow])
        else:
            lowFreqRespCellsD.append(database.iloc[indRow])

    respHighD = pd.DataFrame(highFreqRespCellsD)
    respMidD = pd.DataFrame(midFreqRespCellsD)
    respLowD = pd.DataFrame(lowFreqRespCellsD)

    signRespCellsHighD = respHighD.query('pValHighFRD < 0.05')
    signRespCellsMidD = respMidD.query('pValMidFRD < 0.05')
    signRespCellsLowD = respLowD.query('pValLowFRD < 0.05')

    bins = 30
    plt.figure(figsize=(10, 4.5)).suptitle(name, fontsize=9, y=1.01)
    ax = plt.subplot2grid((2, 3), (0, 0))  # Subplots

    ax0 = plt.subplot2grid((2, 3), (0, 0))
    highIndA = respHighA['expIndHighA']
    plt.hist(highIndA[~np.isnan(highIndA)],
             bins,
             histtype='step',
             color='limegreen')
    signCellsHighA = signRespCellsHighA['expIndHighA']
    plt.hist(signCellsHighA[~np.isnan(signCellsHighA)],
             bins,
             color='limegreen')
    plt.title('High Frequency - Ascending', fontsize=8)
    plt.xlabel('Expectation Index', fontsize=8)
    plt.ylabel('Num of Cells', fontsize=8)
    plt.xlim(-1, 1)

    ax1 = plt.subplot2grid((2, 3), (0, 1))
    midIndA = respMidA['expIndMidA']
    plt.hist(midIndA[~np.isnan(midIndA)],
             bins,
             histtype='step',
             color='dodgerblue')
    signCellsMidA = signRespCellsMidA['expIndMidA']
    plt.hist(signCellsMidA[~np.isnan(signCellsMidA)], bins, color='dodgerblue')
    plt.title('Middle Frequency - Ascending', fontsize=8)
    plt.xlabel('Expectation Index', fontsize=8)
    plt.ylabel('Num of Cells', fontsize=8)
    plt.xlim(-1, 1)

    ax2 = plt.subplot2grid((2, 3), (0, 2))
    lowIndA = respLowA['expIndLowA']
    plt.hist(lowIndA[~np.isnan(lowIndA)],
             bins,
             histtype='step',
             color='darkorchid')
    signCellsLowA = signRespCellsLowA['expIndLowA']
    plt.hist(signCellsLowA[~np.isnan(signCellsLowA)], bins, color='darkorchid')
    plt.title('Low Frequency - Ascending', fontsize=8)
    plt.xlabel('Expectation Index', fontsize=8)
    plt.ylabel('Num of Cells', fontsize=8)
    plt.xlim(-1, 1)

    ax3 = plt.subplot2grid((2, 3), (1, 0))
    highIndD = respHighD['expIndHighD']
    plt.hist(highIndD[~np.isnan(highIndD)],
             bins,
             histtype='step',
             color='limegreen')
    signCellsHighD = signRespCellsHighD['expIndHighD']
    plt.hist(signCellsHighD[~np.isnan(signCellsHighD)],
             bins,
             color='limegreen')
    plt.title('High Frequency - Descending', fontsize=8)
    plt.xlabel('Expectation Index', fontsize=8)
    plt.ylabel('Num of Cells', fontsize=8)
    plt.xlim(-1, 1)

    ax4 = plt.subplot2grid((2, 3), (1, 1))
    midIndD = respMidD['expIndMidD']
    plt.hist(midIndD[~np.isnan(midIndD)],
             bins,
             histtype='step',
             color='dodgerblue')
    signCellsMidD = signRespCellsMidD['expIndMidD']
    plt.hist(signCellsMidD[~np.isnan(signCellsMidD)], bins, color='dodgerblue')
    plt.title('Middle Frequency - Descending', fontsize=8)
    plt.xlabel('Expectation Index', fontsize=8)
    plt.ylabel('Num of Cells', fontsize=8)
    plt.xlim(-1, 1)

    ax5 = plt.subplot2grid((2, 3), (1, 2))
    lowIndD = respLowD['expIndLowD']
    plt.hist(lowIndD[~np.isnan(lowIndD)],
             bins,
             histtype='step',
             color='darkorchid')
    signCellsLowD = signRespCellsLowD['expIndLowD']
    plt.hist(signCellsLowD[~np.isnan(signCellsLowD)], bins, color='darkorchid')
    plt.title('Low Frequency - Descending', fontsize=8)
    plt.xlabel('Expectation Index', fontsize=8)
    plt.ylabel('Num of Cells', fontsize=8)
    plt.xlim(-1, 1)

    plt.tight_layout()
    plt.gcf().set_size_inches([10, 4.5])
    figFormat = 'png'
    figFilename = 'expectation_index_hist_{}.{}'.format(name, figFormat)
    outputDir = os.path.join(settings.FIGURES_DATA_PATH,
                             studyparams.STUDY_NAME)
    figFullpath = os.path.join(outputDir, figFilename)
    plt.savefig(figFullpath, format=figFormat)

    plt.show()

    # -- Statistics --
    ## -- Ascending --
    print('Ascending:')
    ### -- High Frequency --
    highIndA = highIndA[~np.isnan(highIndA)]
    medianHighIndA = np.median(highIndA)
    signCellsHighA = signCellsHighA[~np.isnan(signCellsHighA)]
    medianSignCellsHighA = np.median(signCellsHighA)
    print('Median High Freq Responsive = {}').format(medianHighIndA)
    print('Median High Freq Significant = {}').format(medianSignCellsHighA)

    statHighA, pHighA, medHighA, tblHighA = stats.median_test(
        highIndA, signCellsHighA)
    print('Median_test = {}').format(medHighA)

    ### -- Middle Frequency --
    midIndA = midIndA[~np.isnan(midIndA)]
    medianMidIndA = np.median(midIndA)
    signCellsMidA = signCellsMidA[~np.isnan(signCellsMidA)]
    medianSignCellsMidA = np.median(signCellsMidA)
    print('Median Mid Freq Responsive = {}').format(medianMidIndA)
    print('Median Mid Freq Significant = {}').format(medianSignCellsMidA)

    statMidA, pMidA, medMidA, tblMidA = stats.median_test(
        midIndA, signCellsMidA)
    print('Median_test = {}').format(medMidA)

    ### -- Low Frequency --
    lowIndA = lowIndA[~np.isnan(lowIndA)]
    medianLowIndA = np.median(lowIndA)
    signCellsLowA = signCellsLowA[~np.isnan(signCellsLowA)]
    medianSignCellsLowA = np.median(signCellsLowA)
    print('Median Low Freq Responsive = {}').format(medianLowIndA)
    print('Median Low Freq Significant = {}').format(medianSignCellsLowA)

    statLowA, pLowA, medLowA, tblLowA = stats.median_test(
        lowIndA, signCellsLowA)
    print('Median_test = {}').format(medLowA)

    ## -- Descending --
    print('Descending:')
    ### -- High Frequency --
    highIndD = highIndD[~np.isnan(highIndD)]
    medianHighIndD = np.median(highIndD)
    signCellsHighD = signCellsHighD[~np.isnan(signCellsHighD)]
    medianSignCellsHighD = np.median(signCellsHighD)
    print('Median High Freq Responsive = {}').format(medianHighIndD)
    print('Median High Freq Significant = {}').format(medianSignCellsHighD)

    statHighD, pHighD, medHighD, tblHighD = stats.median_test(
        highIndD, signCellsHighD)
    print('Median_test = {}').format(medHighD)

    ### -- Middle Frequency --
    midIndD = midIndD[~np.isnan(midIndD)]
    medianMidIndD = np.median(midIndD)
    signCellsMidD = signCellsMidD[~np.isnan(signCellsMidD)]
    medianSignCellsMidD = np.median(signCellsMidD)
    print('Median Mid Freq Responsive = {}').format(medianMidIndD)
    print('Median Mid Freq Significant = {}').format(medianSignCellsMidD)

    statMidD, pMidD, medMidD, tblMidD = stats.median_test(
        midIndD, signCellsMidD)
    print('Median_test = {}').format(medMidD)

    ### -- Low Frequency --
    lowIndD = lowIndD[~np.isnan(lowIndD)]
    medianLowIndD = np.median(lowIndD)
    signCellsLowD = signCellsLowD[~np.isnan(signCellsLowD)]
    medianSignCellsLowD = np.median(signCellsLowD)
    print('Median Low Freq Responsive = {}').format(medianLowIndD)
    print('Median Low Freq Significant = {}').format(medianSignCellsLowD)

    statLowD, pLowD, medLowD, tblLowD = stats.median_test(
        lowIndD, signCellsLowD)
    print('Median_test = {}').format(medLowD)

    ### -- Responsive sessions --
    statA, pA, medA, tblA = stats.median_test(highIndA, midIndA, lowIndA)
    statD, pD, medD, tblD = stats.median_test(highIndD, midIndD, lowIndD)
    print('Ascending median test = {}').format(medA)
    print('Descending median test = {}').format(medD)

    ### -- Significantly responsive sessions --
    statSignA, pSignA, medSignA, tblSignA = stats.median_test(
        signCellsHighA, signCellsMidA, signCellsLowA)
    statSignD, pSignD, medSignD, tblSignD = stats.median_test(
        signCellsHighD, signCellsMidD, signCellsLowD)
    print('Ascending median test for significantly responsive cells = {}'
          ).format(medSignA)
    print('Descending median test for significantly responsive cells = {}'
          ).format(medSignD)

    ### -- Additional Stats --
    print(name)
    percentCellsShiftedRightHighA = sum(highIndA > 0.0) / len(respHighA) * 100
    percentSignCellsShiftedRightHighA = sum(
        signCellsHighA > 0.0) / len(signRespCellsHighA) * 100
    print(
        'High frequency ascending - {:.2f}% of cells have an expectation index shifted to signify an increase in firing from an unexpected sound to an expected one and {:.2f}% of significantly responsive cells show the same shift.'
        .format(percentCellsShiftedRightHighA,
                percentSignCellsShiftedRightHighA))

    percentCellsShiftedRightMidA = sum(midIndA > 0.0) / len(respMidA) * 100
    percentSignCellsShiftedRightMidA = sum(
        signCellsMidA > 0.0) / len(signRespCellsMidA) * 100
    print(
        'Middle frequency ascending - {:.2f}% of cells have an expectation index shifted to signify an increase in firing from an unexpected sound to an expected one and {:.2f}% of significantly responsive cells show the same shift.'
        .format(percentCellsShiftedRightMidA,
                percentSignCellsShiftedRightMidA))

    percentCellsShiftedRightLowA = sum(lowIndA > 0.0) / len(respLowA) * 100
    percentSignCellsShiftedRightLowA = sum(
        signCellsLowA > 0.0) / len(signRespCellsLowA) * 100
    print(
        'Low frequency ascending - {:.2f}% of cells have an expectation index shifted to signify an increase in firing from an unexpected sound to an expected one and {:.2f}% of significantly responsive cells show the same shift.'
        .format(percentCellsShiftedRightLowA,
                percentSignCellsShiftedRightLowA))

    percentCellsShiftedRightHighD = sum(highIndD > 0.0) / len(respHighD) * 100
    percentSignCellsShiftedRightHighD = sum(
        signCellsHighD > 0.0) / len(signRespCellsHighD) * 100
    print(
        'High frequency descending - {:.2f}% of cells have an expectation index shifted to signify an increase in firing from an unexpected sound to an expected one and {:.2f}% of significantly responsive cells show the same shift.'
        .format(percentCellsShiftedRightHighD, percentCellsShiftedRightHighD))

    percentCellsShiftedRightMidD = sum(midIndD > 0.0) / len(respMidD) * 100
    percentSignCellsShiftedRightMidD = sum(
        signCellsMidD > 0.0) / len(signRespCellsMidD) * 100
    print(
        'Middle frequency descending - {:.2f}% of cells have an expectation index shifted to signify an increase in firing from an unexpected sound to an expected one and {:.2f}% of significantly responsive cells show the same shift.'
        .format(percentCellsShiftedRightMidD,
                percentSignCellsShiftedRightMidD))

    percentCellsShiftedRightLowD = sum(lowIndD > 0.0) / len(respLowD) * 100
    percentSignCellsShiftedRightLowD = sum(
        signCellsLowD > 0.0) / len(signRespCellsLowD) * 100
    print(
        'Low frequency descending - {:.2f}% of cells have an expectation index shifted to signify an increase in firing from an unexpected sound to an expected one and {:.2f}% of significantly responsive cells show the same shift.'
        .format(percentCellsShiftedRightLowD,
                percentSignCellsShiftedRightLowD))
def custom(a, b):
    _, p, _, _ = stats.median_test(a, b)
    return p
Beispiel #17
0
def mood_median(L):
    score = median_test(*list(get_distances_per_class(L).values()))[0]
    if not pd.isnull(score):
        return (score, )
    else:
        return (float('-inf'), )
Beispiel #18
0
def plot_synapse_delays(ax,
                        data,
                        xlim=None,
                        ylim=None,
                        xscale='log',
                        density_scaling='count',
                        naxes=3,
                        report=sys.stdout):
    """
    Plot corretion and marginals
    :param ax: axes handle
    :param data: pandas.DataFrame with columns pre, post, functional_delay, functional_strength, structural_delay,
    structural_strength, synaptic_delay, delayed, simultaneous
    :param xlim,  ylim: limits for x and y axis of the scatter plot
    :param xscale: scaling for x axis of the scatter plot (default: 'log')
    :param density_scaling: scaling for density axis of the marginal plots (default: 'count')
    :param naxes: if 3 do plot the marginals (default: 3)
    :param report: file handle (default: sys.stdout just prints)
    """
    # TODO Check correct behaviour for naxes == 2!

    # New axes
    if naxes == 2:
        axScatter = ax
        fig = ax.get_figure()
        divider = make_axes_locatable(axScatter)
        axHisty = divider.new_horizontal(size="50%", pad=0.05)
        fig.add_axes(axHisty)
    if naxes == 3:
        rect_histx, rect_histy, rect_scatter = axes_to_3_axes(ax)
        axScatter = plt.axes(rect_scatter)
        axHistx = plt.axes(rect_histx)
        axHisty = plt.axes(rect_histy)

    # Subsetting the data
    n_total = len(data)
    delayed = data[data.delayed]
    n_delayed = len(delayed)
    simultaneous = data[data.simultaneous]
    n_simultanous = len(simultaneous)

    # scatter plot
    axScatter.scatter(simultaneous.functional_strength,
                      simultaneous.synaptic_delay,
                      color='red',
                      label='<1 ms (%d%%)' % (100.0 * n_simultanous / n_total))
    axScatter.scatter(delayed.functional_strength,
                      delayed.synaptic_delay,
                      color='green',
                      label='>1 ms (%d%%)' % (100.0 * n_delayed / n_total))
    axScatter.set_xscale(xscale)
    axScatter.legend(frameon=False, scatterpoints=1)
    axScatter.set_xlabel(r'$\mathsf{z_{max}}$', fontsize=14)
    axScatter.set_ylabel(
        r'$\mathsf{\tau_{synapse}=\tau_{spike}-\tau_{axon}\ [ms]}$',
        fontsize=14)

    # density plot
    kernel_density(axHisty,
                   data.synaptic_delay,
                   yscale=density_scaling,
                   style='k-',
                   orientation='horizontal')
    if naxes == 3:
        # joint legend by proxies
        plt.sca(ax)
        plt.vlines(0, 0, 0, colors='green', linestyles='-', label='>1ms')
        plt.vlines(0, 0, 0, colors='red', linestyles='-', label='<1ms')
        plt.vlines(0, 0, 0, colors='black', linestyles='-', label='all')
        plt.legend(frameon=False, fontsize=12)

        kernel_density(axHistx,
                       data.functional_strength,
                       xscale=xscale,
                       yscale=density_scaling,
                       style='k-',
                       orientation='vertical')
        kernel_density(axHistx,
                       simultaneous.functional_strength,
                       xscale=xscale,
                       yscale=density_scaling,
                       style='r-',
                       orientation='vertical')
        kernel_density(axHistx,
                       delayed.functional_strength,
                       xscale=xscale,
                       yscale=density_scaling,
                       style='g-',
                       orientation='vertical')
        axHistx.set_xscale(xscale)

    section = {}
    section['delayed'] = {
        'median': float(np.median(delayed.functional_strength)),
        'mean': float(np.mean(delayed.functional_strength)),
        'n': int(n_delayed),
        'p': float(n_delayed / n_total)
    }

    section['simultaneous'] = {
        'median': float(np.median(simultaneous.functional_strength)),
        'mean': float(np.mean(simultaneous.functional_strength)),
        'n': int(n_simultanous),
        'p': float(n_simultanous / n_total)
    }
    t, p = ttest_ind(np.log(simultaneous.functional_strength),
                     np.log(delayed.functional_strength))
    section['Students_t_test'] = {'p': float(p), 't': float(t)}
    xhi2, p, med, tbl = median_test(simultaneous.functional_strength,
                                    delayed.functional_strength)
    section['Moods_median_test'] = {
        'xhi2': float(xhi2),
        'p': float(p),
        'median_difference': float(med)
    }
    yaml.dump({'synapse_z_max': section}, report)

    # define limits
    max_functional_strength = max(data.functional_strength)
    if xlim is None:
        xlim = (min(data.functional_strength), max_functional_strength * 2
                )  # leave some room on the left
    if ylim is None: ylim = (min(data.synapse_delay), max(data.synapse_delay))

    # set limits
    axScatter.set_xlim(xlim)
    axScatter.set_ylim(ylim)
    axHistx.set_xlim(axScatter.get_xlim())
    axHisty.set_ylim(axScatter.get_ylim())

    # add hlines to Scatter
    axScatter.hlines(0, 0, max_functional_strength * 2, linestyles='--')
    axScatter.hlines(-1, 0, max_functional_strength * 2, linestyles=':')
    axScatter.hlines(+1, 0, max_functional_strength * 2, linestyles=':')

    # no labels
    nullfmt = NullFormatter()  # no labels
    axHistx.xaxis.set_major_formatter(nullfmt)
    axHistx.yaxis.set_major_formatter(nullfmt)
    axHisty.xaxis.set_major_formatter(nullfmt)
    axHisty.yaxis.set_major_formatter(nullfmt)
Beispiel #19
0
def my_median_test(df,
                   metric='Yield',
                   descriptors=['Product group', 'Line', 'Shift'],
                   stat_cut_off=1e-2,
                   continuous=False):
    """
    Parameters
    ----------
    metric: str, default Yield
        Yield, Rate, or Uptime (or whatever you have a col name for
        I guess jajajaj)
    stat_cut_off: float, default 1e-2
        p-test cutoff (<0.01 chance of null hypothesis)

    Returns
    -------
    stat_df: DataFrame
        Moods Median Test Results for Metric
    """
    if continuous:
        moods = []
        for descriptor in descriptors:
            stat, p = stats.pearsonr(production_df[metric],
                                     production_df[descriptor])
            moods.append([descriptor, stat, p])
        stat_df = pd.DataFrame(moods)
        stat_df.columns = ['descriptor', 'stat', 'p']
        stat_df = stat_df.sort_values(by='stat',
                                      ascending=False).reset_index(drop=True)
        stat_df = stat_df.loc[stat_df['p'] < stat_cut_off].drop_duplicates(
            'stat').reset_index(drop=True)
        stat_df['score'] = stat_df['stat']
        stat_df = stat_df.reset_index(drop=True)
    else:
        moods = []
        for descriptor in descriptors:
            for item in df[descriptor].unique():
                try:
                    stat, p, m, table = stats.median_test(
                        df.loc[df[descriptor] == item][metric],
                        df.loc[~(df[descriptor] == item)][metric],
                        nan_policy='omit')
                    moods.append([descriptor, item, stat, p, m, table])
                except:
                    pass
        stat_df = pd.DataFrame(moods)
        stat_df.columns = ['descriptor', 'group', 'stat', 'p', 'm', 'table']
        stat_df = stat_df.sort_values(by='stat',
                                      ascending=False).reset_index(drop=True)
        stat_df = stat_df.loc[stat_df['p'] < stat_cut_off].drop_duplicates(
            'stat').reset_index(drop=True)
        scores = []
        for index in range(stat_df.shape[0]):
            x = df.loc[(df[stat_df.iloc[index]['descriptor']] == \
                        stat_df.iloc[index]['group'])][metric]
            y = df.loc[(df[stat_df.iloc[index]['descriptor']] == \
                        stat_df.iloc[index]['group'])][metric].median()
            y = df.loc[(
                df[stat_df.iloc[index]['descriptor']] == stat_df.iloc[index]
                ['group'])][stat_df.iloc[index]['descriptor']]
            if metric == 'Uptime':
                scores.append(stat_df['table'][index][1][0] /
                              stat_df['table'][index][0][0])
            else:
                scores.append(stat_df['table'][index][0][0] /
                              stat_df['table'][index][1][0])
        stat_df['score'] = scores
        stat_df = stat_df.sort_values('score', ascending=True)
        stat_df = stat_df.reset_index(drop=True)
    return stat_df
Beispiel #20
0
def starplot(df=[],
             x='',
             y='',
             data=[],
             index=[],
             columns=[],
             fold=False,
             foldcol=0,
             mode=3,
             errorbar=True,
             plottype='barplot',
             stats='independent t test',
             test_var=False,
             stats_var='f test',
             crit_var=0.05,
             equal_var=True,
             rotate=0,
             elinewidth=0.5,
             fontsize=14,
             capsize=4,
             noffset_ylim=35,
             noffset_fst=10,
             noffset_diff=10,
             star_size=3,
             linewidth=1,
             crit=[0.05, 0.01, 0.001, 0.0001]):
    # data: list of data matrixs(or DataFrames) for comparison (row: obs, columns: var)
    # index: var, columns: obs
    # adjacent: annotate star for adjacent bar
    # control: annotate star between all other bars to selctive control bar
    # mix: mix mode
    # 3: annotate star for all combination of bar (only 3 bars available)

    crit = np.array(crit)
    plt.rcParams['font.family'] = 'Times New Roman'
    fig, ax = plt.subplots()
    star = ['*', '**', '***', '****']
    n = len(data)
    m = data[0].shape[1]
    test = pd.DataFrame()
    for i, j in enumerate(data):
        if type(test) == type(j):
            data[i] = j.values.reshape(len(j.index), len(j.columns))
    if plottype == 'barplot':
        error = pd.DataFrame()
        mean = pd.DataFrame()
        for i in range(m):
            error[i] = [data[j][:, i].std() for j in range(n)]
            mean[i] = [data[j][:, i].mean() for j in range(n)]
        error = error.transpose()
        mean = mean.transpose()
        if len(index) != 0:
            error.index = index
            mean.index = index
        if len(columns) != 0:
            error.columns = columns
            mean.columns = columns
        if fold == True:
            oldmean = mean.copy()
            olderror = error.copy()
            for i in range(len(mean.columns)):
                mean.iloc[:, i] = oldmean.iloc[:, i] / oldmean.iloc[:, foldcol]
                error.iloc[:,
                           i] = olderror.iloc[:, i] / oldmean.iloc[:, foldcol]
        if errorbar == True:
            plot = plot = mean.plot.bar(yerr=error,
                                        ax=ax,
                                        rot=rotate,
                                        capsize=capsize,
                                        error_kw=dict(elinewidth=elinewidth),
                                        fontsize=fontsize)
            max_bar = [[mean.iloc[j, i] + error.iloc[j, i] for i in range(n)]
                       for j in range(m)]
            min_bar = [
                mean.iloc[j, i] - error.iloc[j, i] for i in range(n)
                for j in range(m)
            ]
        else:
            plot = plot = mean.plot.bar(ax=ax,
                                        rot=rotate,
                                        capsize=capsize,
                                        error_kw=dict(elinewidth=elinewidth),
                                        fontsize=fontsize)
            max_bar = [[mean.iloc[j, i] for i in range(n)] for j in range(m)]
            min_bar = [mean.iloc[j, i] for i in range(n) for j in range(m)]
    elif plottype == 'boxplot':
        print("under buiding")
    ylim = 0
    offset = max([max_bar[i][j] for i in range(m) for j in range(n)]) / 100
    blank = []
    if mode == 3:
        for j in range(m):
            level = np.zeros(n)
            for i in range(n):
                if i < n - 1:
                    k = i + 1
                else:
                    k = 0
                if test_var == True:
                    if stats_var == 'f test':
                        f = 0.5 - abs(0.5 - ftest.sf(
                            data[i][:, j].var() / data[k][:, j].var(),
                            len(data[i][:, j]) - 1,
                            len(data[k][:, j]) - 1))
                        if crit_var / 2 > f:
                            equal_var = False
                        else:
                            equal_var = True
                    else:
                        if stats_var == 'bartlett':
                            f = bartlett(data[i][:, j], data[k][:, j])[1]
                        elif stats_var == 'levene':
                            f = bartlett(data[i][:, j], data[k][:, j])[1]
                        elif stats_var == 'fligner':
                            f = fligner(data[i][:, j], data[k][:, j])[1]
                        if crit_var > f:
                            equal_var = False
                        else:
                            equal_var = True
                if stats == 'independent t test':
                    p = ttest_ind(data[i][:, j],
                                  data[k][:, j],
                                  equal_var=equal_var)[1]
                elif stats == 'paired t test':
                    if equal_var == True:
                        p = ttest_rel(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                elif stats == 'median test':
                    p = median_test(data[i][:, j], data[k][:, j])[1]
                elif stats == 'mannwhitneyu':
                    if equal_var == True:
                        p = mannwhitneyu(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                elif stats == 'wilcoxon':
                    if equal_var == True:
                        p = wilcoxon(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                level[i] = len(crit) - len(crit.compress(p > crit))
            for k in range(n):
                height = 0
                if level[k] != 0 and k != n - 1:
                    center = [
                        plot.patches[k * m + j].get_x(),
                        plot.patches[k * m + m + j].get_x()
                    ]
                    height = max([max_bar[j][k], max_bar[j][k + 1]])
                    h1 = max_bar[j][k]
                    h2 = max_bar[j][k + 1]
                    width = plot.patches[k * m + j].get_width()
                    blank.append(
                        (center[0] + width / 2,
                         height + noffset_fst * offset + (-1)**k * 2 * offset))
                    blank.append(
                        (center[1] + width / 2,
                         height + noffset_fst * offset + (-1)**k * 2 * offset))
                    ax.vlines(x=center[0] + width / 2,
                              ymin=h1 + offset * 2,
                              ymax=height + noffset_fst * offset +
                              (-1)**k * 2 * offset,
                              lw=linewidth)
                    ax.vlines(x=center[1] + width / 2,
                              ymin=h2 + offset * 2,
                              ymax=height + noffset_fst * offset +
                              (-1)**k * 2 * offset,
                              lw=linewidth)
                    ax.annotate(star[int(level[k] - 1)],
                                xy=((center[0] + center[1]) / 2 + width / 2,
                                    height + (noffset_fst + 1) * offset +
                                    (-1)**k * 2 * offset),
                                ha='center',
                                size=star_size)
                elif level[k] != 0 and k == n - 1:
                    center = [
                        plot.patches[j].get_x(),
                        plot.patches[k * m + j].get_x()
                    ]
                    height = max(max_bar[j])
                    h1 = max_bar[j][0]
                    h2 = max_bar[j][k]
                    blank.append(
                        (center[0] + width / 2,
                         height + (noffset_fst + noffset_diff) * offset))
                    blank.append((center[1] + width / 2, height + 20 * offset))
                    ax.vlines(x=center[0] + width / 2,
                              ymin=h1 + offset * 2,
                              ymax=height +
                              (noffset_fst + noffset_diff) * offset,
                              lw=linewidth)
                    ax.vlines(x=center[1] + width / 2,
                              ymin=h2 + offset * 2,
                              ymax=height +
                              (noffset_fst + noffset_diff) * offset,
                              lw=linewidth)
                    ax.annotate(star[int(level[k] - 1)],
                                xy=((center[0] + center[1]) / 2 + width / 2,
                                    height +
                                    (noffset_fst + noffset_diff + 1) * offset),
                                ha='center',
                                size=star_size)
                if height > ylim:
                    ylim = height
    if mode == 'adjacent':
        for j in range(m):
            level = np.zeros(n - 1)
            for i in range(n - 1):
                k = i + 1
                if test_var == True:
                    if stats_var == 'f test':
                        f = 0.5 - abs(0.5 - ftest.sf(
                            data[i][:, j].var() / data[k][:, j].var(),
                            len(data[i][:, j]) - 1,
                            len(data[k][:, j]) - 1))
                        if crit_var / 2 > f:
                            equal_var = False
                        else:
                            equal_var = True
                    else:
                        if stats_var == 'bartlett':
                            f = bartlett(data[i][:, j], data[k][:, j])[1]
                        elif stats_var == 'levene':
                            f = bartlett(data[i][:, j], data[k][:, j])[1]
                        elif stats_var == 'fligner':
                            f = fligner(data[i][:, j], data[k][:, j])[1]
                        if crit_var > f:
                            equal_var = False
                        else:
                            equal_var = True
                if stats == 'independent t test':
                    p = ttest_ind(data[i][:, j],
                                  data[k][:, j],
                                  equal_var=equal_var)[1]
                elif stats == 'paired t test':
                    if equal_var == True:
                        p = ttest_rel(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                elif stats == 'median test':
                    p = median_test(data[i][:, j], data[k][:, j])[1]
                elif stats == 'mannwhitneyu':
                    if equal_var == True:
                        p = mannwhitneyu(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                elif stats == 'wilcoxon':
                    if equal_var == True:
                        p = wilcoxon(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                level[i] = len(crit) - len(crit.compress(p > crit))
            for k in range(n - 1):
                height = 0
                if level[k] != 0:
                    center = [
                        plot.patches[k * m + j].get_x(),
                        plot.patches[k * m + m + j].get_x()
                    ]
                    height = max([max_bar[j][k], max_bar[j][k + 1]])
                    h1 = max_bar[j][k]
                    h2 = max_bar[j][k + 1]
                    width = plot.patches[k * m + j].get_width()
                    blank.append(
                        (center[0] + width / 2,
                         height + noffset_fst * offset + (-1)**k * 2 * offset))
                    blank.append(
                        (center[1] + width / 2,
                         height + noffset_fst * offset + (-1)**k * 2 * offset))
                    ax.vlines(x=center[0] + width / 2,
                              ymin=h1 + offset * 2,
                              ymax=height + noffset_fst * offset +
                              (-1)**k * 2 * offset,
                              lw=linewidth)
                    ax.vlines(x=center[1] + width / 2,
                              ymin=h2 + offset * 2,
                              ymax=height + noffset_fst * offset +
                              (-1)**k * 2 * offset,
                              lw=linewidth)
                    ax.annotate(star[int(level[k] - 1)],
                                xy=((center[0] + center[1]) / 2 + width / 2,
                                    height + (noffset_fst + 1) * offset +
                                    (-1)**k * 2 * offset),
                                ha='center',
                                size=star_size)
                if height > ylim:
                    ylim = height
    ax.set_ylim(min(0,
                    min(min_bar) - 10 * offset), ylim + noffset_ylim * offset)
    for j, i in enumerate(blank):
        ax.vlines(x=i[0],
                  ymin=i[1],
                  ymax=i[1] + offset * 2,
                  color='white',
                  lw=1.2 * linewidth)
        if j % 2 == 1:
            ax.hlines(y=i[1], xmin=blank[j - 1], xmax=blank[j], lw=linewidth)
Beispiel #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o",
                        "--outfile",
                        required=True,
                        help="Path to the output file.")
    parser.add_argument("--sample_one_cols",
                        help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols",
                        help="Input format, like smi, sdf, inchi")
    parser.add_argument(
        "--sample_cols",
        help="Input format, like smi, sdf, inchi,separate arrays using ;",
    )
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help=
        "Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help=
        "If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta",
        action="store_true",
        default=False,
        help="Whether or not to return the internally computed a values.",
    )
    parser.add_argument(
        "--fisher",
        action="store_true",
        default=False,
        help="if true then Fisher definition is used",
    )
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help=
        "if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument(
        "--inclusive1",
        action="store_true",
        default=False,
        help="if false,lower_limit will be ignored",
    )
    parser.add_argument(
        "--inclusive2",
        action="store_true",
        default=False,
        help="if false,higher_limit will be ignored",
    )
    parser.add_argument(
        "--inclusive",
        action="store_true",
        default=False,
        help="if false,limit will be ignored",
    )
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help=
        "If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help=
        "Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument(
        "--correction",
        action="store_true",
        default=False,
        help="continuity correction ",
    )
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help=
        "Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help=
        "the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b",
                        type=int,
                        default=0,
                        help="The number of bins to use for the histogram")
    parser.add_argument("--N",
                        type=int,
                        default=0,
                        help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof",
                        type=int,
                        default=0,
                        help="Degrees of freedom correction")
    parser.add_argument(
        "--score",
        type=int,
        default=0,
        help="Score that is compared to the elements in a.",
    )
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help=
        "The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument(
        "--new",
        type=float,
        default=0.0,
        help="Value to put in place of values in a outside of bounds",
    )
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help=
        "lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help=
        "If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument(
        "--base",
        type=float,
        default=1.6,
        help="The logarithmic base to use, defaults to e",
    )
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols is not None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols is not None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols is not None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(
                map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one),
                                               dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one),
                                       n=args.n,
                                       p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(
                map(float, sample_one),
                axis=args.axis,
                fisher=args.fisher,
                bias=args.bias,
            )
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one),
                                        score=args.score,
                                        kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one),
                                                   alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one),
                                             low=args.m,
                                             high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one),
                cdf=args.cdf,
                N=args.N,
                alternative=args.alternative,
                mode=args.mode,
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one),
                correction=args.correction,
                lambda_=args.lambda_)
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf == 0 and mf == 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf),
                                   (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf == 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one),
                                 lowerlimit=mf,
                                 inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf == 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one),
                                 upperlimit=nf,
                                 inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf == 0 and mf == 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf),
                                 (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf == 0 and mf == 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf),
                                 (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf == 0 and mf == 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf),
                               (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf == 0 and mf == 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one),
                    map(float, sample_two),
                    interpolation_method=args.interpolation,
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one),
                    map(float, sample_two),
                    (mf, nf),
                    interpolation_method=args.interpolation,
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf == 0 and mf == 0:
                rel, low_range, binsize, ex = stats.relfreq(
                    map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(
                    map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf == 0 and mf == 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf == 0 and mf == 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one),
                                    mf,
                                    nf,
                                    newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one),
                               proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(
                map(float, sample_one),
                proportiontocut=args.proportiontocut,
                tail=args.tail,
            )
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf == 0 and mf == 0:
                hi, low_range, binsize, ex = stats.histogram(
                    map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(
                    map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf == 0 and mf == 0:
                cum, low_range, binsize, ex = stats.cumfreq(
                    map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(
                    map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf == 0 and mf == 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf),
                                          method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda == 0:
                box, ma, ci = stats.boxcox(map(float, sample_one),
                                           alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one),
                                   imbda,
                                   alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one),
                                  map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one),
                                                  map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one),
                                        map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one),
                                       map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two))
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one),
                                          map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one),
                                              map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one),
                                        map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one),
                map(float, sample_two),
                use_continuity=args.mwu_use_continuity,
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one),
                           map(float, sample_two),
                           ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(map(float, sample_one),
                                                  map(float, sample_two),
                                                  equal_var=args.equal_var)
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one),
                                      map(float, sample_two),
                                      axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one),
                                    map(float, sample_two),
                                    axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one),
                                          map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one),
                map(float, sample_two),
                initial_lexsort=args.initial_lexsort,
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one),
                              map(float, sample_two),
                              base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one),
                                               map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one),
                                               map(float, sample_two),
                                               ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one),
                                               ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one),
                    map(float, sample_two),
                    ddof=args.ddof,
                    lambda_=args.lambda_,
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one),
                                                       ddof=args.ddof,
                                                       lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one),
                                                     map(float, sample_two),
                                                     alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one),
                                                     alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one),
                    method=args.med,
                    weights=map(float, sample_two),
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one),
                                                      method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center,
                                         proportiontocut=args.proportiontocut,
                                         *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center,
                                      proportiontocut=args.proportiontocut,
                                      *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties,
                correction=args.correction,
                lambda_=args.lambda_,
                *b_samples)
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
Beispiel #22
0
def main(task_path,
         feature_path,
         ylabel,
         output_path,
         BINS=np.array(['-', 'N', '+', 'S']),
         colors=['#FF0000', '#FFFF00', '#00CC00', '#3d77ff'],
         star_colors=['#FF0000', 'orange', '#00CC00', '#3d77ff']):

    df = pd.read_csv(task_path)

    #loader = TruePairwiseFeatureLoader(feature_path)
    loader = SumPairwiseLoader(feature_path)

    bins = sorted(np.unique(df['bin']))

    fig, ax = plt.subplots(1, 1, figsize=(10, 10))

    df['feature'] = loader.get_values(df)
    df['bin'] = BINS[df['bin'].astype(int)]

    ax = sns.violinplot(x="bin",
                        y="feature",
                        ax=ax,
                        order=BINS,
                        data=df,
                        palette=colors,
                        saturation=1)

    ax.yaxis.set_tick_params(labelsize=plot_cfg['tick_label_size'])
    ax.xaxis.set_tick_params(labelsize=plot_cfg['tick_label_size'], pad=15)
    ax.set_ylabel(ylabel, fontsize=plot_cfg['ylabel_size'], weight='bold')
    ax.set_xlabel('')
    ax.yaxis.set_tick_params(length=10, width=1, which='both')
    ax.xaxis.set_tick_params(length=0)
    ax.grid(False)
    plt.setp(ax.spines.values(),
             linewidth=plot_cfg["border_size"],
             color='black')

    max_val = np.max(df['feature'])
    min_val, max_val = ax.get_ylim()
    ax.set_ylim([min_val, max_val * 1.3])
    ax.grid(False)
    plt.setp(ax.spines.values(),
             linewidth=plot_cfg["border_size"],
             color='black')

    bins = BINS

    # plot pvalues
    ALPHA = 0.05
    num_comparisons = len(bins) * (len(bins) - 1) / 2
    adjusted_alpha = ALPHA / num_comparisons

    for i in range(len(bins)):
        a = df[df['bin'] == bins[i]]['feature']
        a_med = np.median(a)
        ax.plot([i, i], [a_med, a_med],
                'o',
                color=plot_cfg['iqr_color'],
                markersize=15)
        iqr_lower = np.percentile(a, 25)
        iqr_upper = np.percentile(a, 75)
        ax.plot([i, i], [iqr_lower, iqr_upper],
                linewidth=5,
                color=plot_cfg['iqr_color'])

        yoffset = max_val

        for j in range(i + 1, len(bins)):
            b = df[df['bin'] == bins[j]]['feature']
            statistic, pvalue, _, _ = stats.median_test(a, b)
            print("%s (%0.2f) vs. %s (%0.2f): %0.6f [%0.6f]" %
                  (bins[i], np.median(a), bins[j], np.median(b), pvalue,
                   statistic))

            if pvalue < adjusted_alpha:
                stars = '*' * eval_funcs.compute_stars(pvalue, adjusted_alpha)
                target_color = star_colors[j]
                ax.text(i,
                        yoffset,
                        stars,
                        color=target_color,
                        ha="center",
                        va="center",
                        weight='bold',
                        fontsize=plot_cfg['stars_label_size'])
                yoffset += 0.1 * max_val

    plt.savefig(output_path, bbox_inches='tight', dpi=100)
    plt.show()

    plt.close()
Beispiel #23
0
if p_value > alpha:
    print('Same distributions (fail to reject H0)')
else:
    print('Different distributions (reject H0)')
stat, p_value = mood(dataset['Open'], dataset['Adj Close'])

print('Mood Test')
print('-' * 40)
print('Statistics=%.3f, p=%.3f' % (stat, p_value))
# interpret
alpha = 0.05
if p_value > alpha:
    print('Same distributions (fail to reject H0)')
else:
    print('Different distributions (reject H0)')
stat, p_value, med, tbl = median_test(dataset['Open'], dataset['Adj Close'],
                                      dataset['Volume'])

print('Mood’s median test')
print('-' * 40)
print('Statistics=%.3f, p=%.3f' % (stat, p_value))
# interpret
alpha = 0.05
if p_value > alpha:
    print('Same distributions (fail to reject H0)')
else:
    print('Different distributions (reject H0)')

stat, p_value, med, tbl = median_test(dataset['Open'],
                                      dataset['Adj Close'],
                                      dataset['Volume'],
                                      lambda_="log-likelihood")
Beispiel #24
0
                "markerfacecolor": "black",
                "markeredgecolor": "black",
                "markersize": "8"
            }).set(title='Heart Disease Status vs. Serum Cholesterol')
sns.set(font_scale=1.7)
plt.text(2 + 0.2,
         4.5,
         "* = mean ",
         horizontalalignment='left',
         size='small',
         color='black')

# stats
pd.set_option('display.expand_frame_repr', False)
df.groupby([x, hue])[y].describe()
stat, p, med, tbl = median_test(hdserum, ndhserum)
med
p
stat
tbl

# Part 3: Make a violin plot of part 2
x = 'Heart Disease Status'
y = "Serum Cholesterol in mg/dl"
sns.catplot(
    x, y, kind='violin', hue='Sex', data=df,
    palette='Blues').set(title='Heart Disease Status vs. Serum Cholesterol')
plt.text(2 + 0.2,
         4.5,
         "* = mean ",
         horizontalalignment='left',
Beispiel #25
0
    #     print('ttest', i, tstat, ttpval)

    # KS test
    ksstat, ks_pval = stats.ks_2samp(fl_by_time_clpXminus_cut[i],
                                     fl_by_time_clpXplus_cut[i])
    #     print('KS test', ksstat, ks_pval)

    # Mann-Whitney
    mwstat, mwpval = stats.mannwhitneyu(fl_by_time_clpXminus_cut[i],
                                        fl_by_time_clpXplus_cut[i],
                                        alternative='greater')
    #     print('Mann-Whitney', mwstat, mwpval)

    # Mood's median test
    median_args = (fl_by_time_clpXminus_cut[i], fl_by_time_clpXplus_cut[i])
    mstat, mpval, _, _ = stats.median_test(*median_args)
    #     print('Median test', mstat, mpval)
    #     print('\n')

    # F test for variance
    var1 = np.var(fl_by_time_clpXplus_cut[i])
    df1 = len(fl_by_time_clpXplus_cut[i]) - 1
    var2 = np.var(fl_by_time_clpXminus_cut[i])
    df2 = len(fl_by_time_clpXminus_cut[i]) - 1
    if var1 > var2:
        F = var1 / var2
        fpval = stats.f.sf(F, df1, df2)
    else:
        F = var2 / var1
        fpval = stats.f.sf(F, df2, df1)
sns.boxplot(x="weather", y="y", data=train, ax=ax[0][1])
sns.boxplot(x="remarks", y="y", data=train, ax=ax[1][0])
ax[1][0].set_xticklabels(ax[1][0].get_xticklabels(), rotation=30)
sns.boxplot(x="event", y="y", data=train, ax=ax[1][1])
plt.tight_layout()

plt.show()

train[train["remarks"] != "お楽しみメニュー"]["y"].plot(figsize=(15, 4),
                                                label="not Amuse")
train[train["remarks"] == "お楽しみメニュー"]["y"].plot(figsize=(15, 4), label="Amuse")

plt.legend()

plt.show()

train["fun"] = train["remarks"].apply(lambda x: 1 if x == "お楽しみメニュー" else 0)
sns.boxplot(x="fun", y="y", data=train)

plt.show()

stat, p, med, tbl = median_test(train[train["fun"] == 1]["y"],
                                train[train["fun"] == 0]["y"])
print("p", p, "stat", stat)

train[train["remarks"] == "お楽しみメニュー"]

train["curry"] = train["name"].apply(lambda x: 1 if x.find("カレー") >= 0 else 0)
sns.boxplot(x="curry", y="y", data=train)

plt.show()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
Beispiel #28
0
     s1 = stats.ttest_ind(values, compare_values,
                          equal_var=l)[0]
     s2 = stats.ttest_ind(values, compare_values,
                          equal_var=l)[1]
 elif levene[agent - 2][action + 1] == "True":
     w = "U"
     s1 = stats.mannwhitneyu(values,
                             compare_values,
                             alternative='two-sided')[0]
     s2 = stats.mannwhitneyu(values,
                             compare_values,
                             alternative='two-sided')[1]
 else:
     w = "chi^2"
     try:
         s1 = stats.median_test(values, compare_values)[0]
         s2 = stats.median_test(values, compare_values)[1]
     except:
         s1 = 0
         s2 = 0
 s = [s1, s2]
 if s[1] < 0.0000000001:
     sec = "***"
 elif s[1] < 0.0000001:
     sec = "**"
 elif s[1] < 0.0001:
     sec = "*"
 else:
     sec = "%.4f" % np.around(s, decimals=4)[1]
 res = "%s (%.4f %s)" % (sec, np.around(s, decimals=4)[0], w)
 results[agent - 2][action + 1] = res
Beispiel #29
0

#均值检验结果
meantest=[]
np.array(meantest)
#中位数检验结果
mediantest=[]
np.array(mediantest)
from scipy import stats as st
#检验
cols=["年龄","储蓄"]
for cols in cols:
    meantest.append
    t,p=st.ttest_ind(datapass[cols].dropna(),datafail[cols].dropna())[0:2]
    meantest.append([cols,t,p])
    t,p=st.median_test(datapass[cols].dropna(),datafail[cols].dropna())[0:2]
    mediantest.append([cols,t,p])


# In[114]:


#显示结果
print(meantest)
print(mediantest)
#python均值检验与中位数检验,P值小于0.05显著,说明两组数据均值与中位数存在差异
#导出结果
pd.DataFrame(meantest).to_csv("meantest.csv",encoding="gbk")
pd.DataFrame(mediantest).to_csv("mediantest.csv",encoding="gbk")