예제 #1
1
def test_accuracy_wilcoxon():
    freq = [1, 4, 16, 15, 8, 4, 5, 1, 2]
    nums = range(-4, 5)
    x = np.concatenate([[u] * v for u, v in zip(nums, freq)])
    y = np.zeros(x.size)

    T, p = stats.wilcoxon(x, y, "pratt")
    assert_allclose(T, 423)
    assert_allclose(p, 0.00197547303533107)

    T, p = stats.wilcoxon(x, y, "zsplit")
    assert_allclose(T, 441)
    assert_allclose(p, 0.0032145343172473055)

    T, p = stats.wilcoxon(x, y, "wilcox")
    assert_allclose(T, 327)
    assert_allclose(p, 0.00641346115861)

    # Test the 'correction' option, using values computed in R with:
    # > wilcox.test(x, y, paired=TRUE, exact=FALSE, correct={FALSE,TRUE})
    x = np.array([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112])
    y = np.array([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187])
    T, p = stats.wilcoxon(x, y, correction=False)
    assert_equal(T, 34)
    assert_allclose(p, 0.6948866, rtol=1e-6)
    T, p = stats.wilcoxon(x, y, correction=True)
    assert_equal(T, 34)
    assert_allclose(p, 0.7240817, rtol=1e-6)
예제 #2
0
def symtest(alpha, axis=None):
    """
    Non-parametric test for symmetry around the median. Works by performing a 
    Wilcoxon sign rank test on the differences to the median.

    H0: the population is symmetrical around the median
    HA: the population is not symmetrical around the median
        

    :param alpha: sample of angles in radian
    :param axis:  compute along this dimension, default is None
                  if axis=None, array is raveled
    :return pval: two-tailed p-value
    :return T:    test statistics of underlying wilcoxon test
   

    References: [Zar2009]_
    """

    m = descriptive.median(alpha, axis=axis)

    d = np.angle(np.exp(1j * m[np.newaxis]) / np.exp(1j * alpha))

    if axis is not None:
        oshape = d.shape[1:]
        d2 = d.reshape((d.shape[0], np.prod(d.shape[1:])))
        T, pval = map(lambda x: np.asarray(x).reshape(oshape), zip(*[stats.wilcoxon(dd) for dd in d2.T]))
    else:
        T, pval = stats.wilcoxon(d)

    return pval, T
예제 #3
0
파일: report2.py 프로젝트: hizki/AI1
 def test_times_slot(time,ri):
     print "time", time
     x = get_solen_of_pss_rids_by_time(room_ids, firts_pss,time)
     y = get_solen_of_pss_rids_by_time(room_ids, second_pss,time)
     
 
     xq = optimals / x
     yq = optimals / y
     
     if ri == []:
         xq,yq, ri = filter_qualties_ext(xq,yq)
         xqt ,yqt = filter_qualties(xq,yq)
         print wilcoxon(xq,yq)
         print wilcoxon(xqt,yqt)
     else:
         xq,yq  = filter_qualties_indx(xq,yq, ri)
     
     print "number of rooms:",len(xq)
     print xq
     print yq
     r = wilcoxon(xq,yq)
     print "wilcoxon: ", r
     wr.append(r[1])
             
     rx.append(median(xq))
     ry.append(median(yq))
     
     return ri
예제 #4
0
def test_wilcoxon_arg_type():
    # Should be able to accept list as arguments.
    # Address issue 6070.
    arr = [1, 2, 3, 0, -1, 3, 1, 2, 1, 1, 2]

    _ = stats.wilcoxon(arr, zero_method="pratt")
    _ = stats.wilcoxon(arr, zero_method="zsplit")
    _ = stats.wilcoxon(arr, zero_method="wilcox")
예제 #5
0
def wilcox(dictresult,interval):
	#Here we print the p-value for the entire result
	for key,value in dictresult.items():
		print "Paired wilcoxon-test with ",key,":\t",wilcoxon(value[0],value[1])
	
	print "--------------------------------------\n"

	#Here we print the p-value for every learning split
	for inte in interval:
		for key,value in dictresult.items():
			print "Paired wilcoxon-test with ",key,"in the interval",inte,"-",inte+5,":\t",wilcoxon(value[0][inte:inte+5],value[1][inte:inte+5])
예제 #6
0
def print_wilcoxon(predictions):
    print("############################################################################")
    print("Wilcoxon Result for Decision Tree: ")
    print(wilcoxon(x=predictions['custom_decision_tree'],
                   y=predictions['sklearn_decision_tree']))
    print("Wilcoxon Result for Random Forest: ")
    print(wilcoxon(x=predictions['custom_random_forest'],
                   y=predictions['sklearn_random_forest']))
    if predictions['sklearn_neighbors']:
        print("Wilcoxon Result for Nearest Neighbors: ")
        print(wilcoxon(x=predictions['sklearn_neighbors']))
    print("############################################################################\n\n")
예제 #7
0
def paired_data():
    '''Analysis of paired data
    Compare mean daily intake over 10 pre-menstrual and 10 post-menstrual days (in kJ).'''
    
    # Get the data:  daily intake of energy in kJ for 11 women
    data = getData('altman_93.txt')
    
    mean(data, axis=0)
    std(data, axis=0, ddof=1)
    
    pre = data[:,0]
    post = data[:,1]
    
    # paired t-test: doing two measurments on the same experimental unit
    # e.g., before and after a treatment
    t_statistic, p_value = stats.ttest_1samp(post - pre, 0)
    
    # p < 0.05 => alternative hypothesis:
    # the difference in mean is not equal to 0
    print("paired t-test", p_value)
    
    # alternative to paired t-test when data has an ordinary scale or when not
    # normally distributed
    z_statistic, p_value = stats.wilcoxon(post - pre)
    print("paired wilcoxon-test", p_value)
def oneGroup():
    '''Test of mean value of a single set of data'''
    
    print('Single group of data =========================================')
    
    # First get the data
    data = np.array([5260, 5470, 5640, 6180, 6390, 6515, 6805, 7515, 7515, 8230, 8770], dtype=np.float)
    checkValue = 7725   # value to compare the data to
    
    # 4.1.1. Normality test
    # We don't need the first parameter, so we just assign the output to the dummy variable "_"
    (_, p) = stats.normaltest(data)
    if p > 0.05:
        print('Data are distributed normally, p = {0}'.format(p))
        
    # 4.1.2. Do the onesample t-test
    t, prob = stats.ttest_1samp(data, checkValue)
    if prob < 0.05:
        print('With the one-sample t-test, {0:4.2f} is significantly different from the mean (p={1:5.3f}).'.\
        format(checkValue, prob))
    else:
        print('No difference from reference value with onesample t-test.')
    
    # 4.1.3. This implementation of the Wilcoxon test checks for the "difference" of one vector of data from zero
    (_,p) = stats.wilcoxon(data-checkValue)
    if p < 0.05:
        print('With the Wilcoxon test, {0:4.2f} is significantly different from the mean (p={1:5.3f}).'.\
        format(checkValue, p))
    else:
        print('No difference from reference value with Wilcoxon rank sum test.')
 def quick_stats(x, chance):
     # x = x[np.where(~np.isnan(x))[0]]
     text = '[%.3f+/-%.3f, p=%.4f]'
     m = np.nanmean(x)
     sem = np.nanstd(x) / np.sqrt(len(x))
     pval = wilcoxon(x - chance)[1]
     return text % (m, sem, pval)
예제 #10
0
파일: util.py 프로젝트: craigatencio/lnpy
def plot_scatter(ax, x, y, xerr, yerr, xlabel='', ylabel='',
                 calc_wilcoxon=True, color=3*[.5], ecolor=3*[.75],
                 fmt='o', **kwargs):
    """create scatter plot with equally-spaced axes, diagonal line etc."""

    ax.axis('on')
    ax.axis('scaled')
    ax.errorbar(x, y, xerr=xerr, yerr=yerr, fmt=fmt,
                color=color, ecolor=ecolor, **kwargs)

    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    max_err = max([xerr.max(), yerr.max()])
    xmin = min([x.min(), y.min()]) - max_err
    xmax = max([x.max(), y.max()]) + max_err
    zz = np.linspace(xmin, xmax, 100)
    ax.plot(zz, zz, 'k--')

    ax.set_xlim(xmin, xmax)
    ax.set_ylim(xmin, xmax)

    if calc_wilcoxon:
        _, p_value = wilcoxon(x, y)
        ax.text(.05, .8, 'p = %.3e' % p_value,
                transform=ax.transAxes)
예제 #11
0
파일: stat.py 프로젝트: jtmnf/TP2_OX_PMX
def wilcoxon(data1, data2):
    """
    non parametric
    two samples
    dependent
    """
    return st.wilcoxon(data1, data2)
예제 #12
0
파일: base.py 프로젝트: kingjr/jr-tools
def _loop_wilcoxon(X, zero_method, correction):
    from scipy.stats import wilcoxon
    p_val = np.ones(X.shape[1])
    stats = np.ones(X.shape[1])
    for ii, x in enumerate(X.T):
        stats[ii], p_val[ii] = wilcoxon(x)
    return stats, p_val
def main(stat_file, kernel):
    stats = defaultdict(dict)
    with open(stat_file, 'r') as f:
        for line in f:
            parts = line.strip().split(',')
            results = parse_nsk_results(parts, kernel)
            if results is None: continue
            dset, norm, stat = results
            stats[dset][norm] = stat

    S = np.array([[stats[dset][norm] for norm in NORMS]
                  for dset in stats.keys()
                    if all((norm in stats[dset])
                           for norm in NORMS)])

    wins = (S[:, 0] > S[:, 1]) + 0.5*(S[:, 0] == S[:, 1])
    winrate = np.average(wins)
    if winrate > 0.5:
        symbol = '>'
    elif winrate < 0.5:
        symbol = '<'
    else:
        symbol = '='

    print np.column_stack([S, wins])
    print '%s %s %s' % (NORMS[0], symbol, NORMS[1])
    print 'Winrate for %s: %f' % (NORMS[0], winrate)
    print 'Wilcoxon p-value: %f' % wilcoxon(S[:, 0], S[:, 1])[1]
예제 #14
0
def print_WilcoxonSRT(x, y=None):
    """
    Compute the Wilcoxon Signed Rank Test
    """
    T,p = stats.wilcoxon(x, y)
    print "Wilcoxon Signed Rank Test:"
    print "p-value: {}".format(p)
예제 #15
0
def significance_comparison(data_vectors):
    # Perform pairwise Wilcoxon sign-rank test on each paired combination of
    # placer algorithms.
    wilcoxon_results = []
    for (k1, v1), (k2, v2) in itertools.combinations(data_vectors.items(), 2):
        min_atoms = min(len(v1), len(v2))

        # Require at least $N$ elements in each vector, where $N = 5$.
        # TODO: $N$ should likely be configurable...
        N = 5
        if min_atoms < N:
            raise ValueError('At least %d elements are required in each '
                             'vector. (%s has %d, %s has %d)' % (N, k1,
                                                                 len(v1), k2,
                                                                 len(v2)))
        if len(v1) > min_atoms:
            print ('[warning] %s has more elements than %s.  Only using first '
                   '%d elements from %s' % (k1, k2, min_atoms, k1))
            v1 = v1[:min_atoms]
        elif len(v2) > min_atoms:
            print ('[warning] %s has more elements than %s.  Only using first '
                   '%d elements from %s' % (k2, k1, min_atoms, k2))
            v2 = v2[:min_atoms]
        p_value = wilcoxon(v1, v2)[-1]
        wilcoxon_results.append((k1, k2, p_value, p_value < 0.05))

    p_values = pd.DataFrame(wilcoxon_results, columns=('A', 'B', 'p-value',
                                                       'significant'))
    return p_values
예제 #16
0
def CompareHeuristcHelp(resultsListW, resultsListG):
    Gwin=0.0
    Wwin=0.0
    GWwin=0.0
    count=0.0
    for i in xrange(0,len(resultsListG)):
        if(resultsListW[i]==0):
            resultsListW[i]=infinity
        if(resultsListG[i]==0):
            resultsListG[i]=infinity
        if(resultsListG[i]<resultsListW[i]):
            Gwin+=1
        if(resultsListG[i]>resultsListW[i]):
            Wwin+=1
        if(resultsListG[i]==resultsListW[i] and (resultsListG[i]!=infinity or resultsListW[i]!=infinity)  ):
            GWwin+=1
    sum=Gwin+Wwin+GWwin
    sum/=100.0
    
    if(sum==0):
        finalResult=(0,0,0)
    else:
        finalResult=(Gwin/sum,Wwin/sum,GWwin/sum)
    
    a=wilcoxon(resultsListW,resultsListG)
    return finalResult       
예제 #17
0
def stats_fn(data_frame):
    global scene
    stat_file = open("Stat_tests_" + scene[:-4] + ".txt", "w")
    seen_pairs = []
    for algorithm in data_frame:
        for algorithm2 in data_frame:
            if (algorithm != algorithm2) and ((algorithm, algorithm2) not in seen_pairs):
                seen_pairs.append((algorithm, algorithm2))
                seen_pairs.append((algorithm2, algorithm))
                statistical_significance = stats.wilcoxon(data_frame[algorithm], data_frame[algorithm2])
                print >> stat_file, algorithm, " VS ", algorithm2, " -->", statistical_significance
                print >> stat_file, algorithm, " median = ", np.median(data_frame[algorithm])
                print >> stat_file, algorithm2, " median = ", np.median(data_frame[algorithm2])
                print >> stat_file, "----------------------------------------------------------"
    # # This part is for drawing the different boxplots
    figure_name = scene + "_.png"
    current_path = os.getcwd()
    os.chdir("/home/omohamme/INRIA/experiments/moop_sim_comparison/boxplots/" + scene[:-4] + "/")
    plt.figure(figsize=(15.0, 11.0))
    plt.boxplot(data_frame.values())
    plt.xticks(range(1, len(data_frame.keys()) + 1), data_frame.keys())
    plt.title(figure_name)
    plt.savefig(figure_name)
    os.chdir(current_path)

    stat_file.close()
예제 #18
0
파일: twoSample.py 프로젝트: CeasarSS/books
def paired_data():
    '''Analysis of paired data
    Compare mean daily intake over 10 pre-menstrual and 10 post-menstrual days (in kJ).'''
    
    # Get the data:  daily intake of energy in kJ for 11 women
    data = getData('altman_93.txt', subDir=r'..\Data\data_altman')
    
    mean(data, axis=0)
    std(data, axis=0, ddof=1)
    
    pre = data[:,0]
    post = data[:,1]
    
    # --- >>> START stats <<< ---
    # paired t-test: doing two measurments on the same experimental unit
    # e.g., before and after a treatment
    t_statistic, p_value = stats.ttest_1samp(post - pre, 0)
    
    # p < 0.05 => alternative hypothesis:
    # the difference in mean is not equal to 0
    print(("paired t-test", p_value))
    
    # alternative to paired t-test when data has an ordinary scale or when not
    # normally distributed
    rankSum, p_value = stats.wilcoxon(post - pre)
    # --- >>> STOP stats <<< ---
    print(("Wilcoxon-Signed-Rank-Sum test", p_value))
    
    return p_value # should be 0.0033300139117459797
예제 #19
0
파일: Stats.py 프로젝트: royxu1972/candybox
    def statsUpdarte(self, arr, opt):
        """
        :parameter
            arr -- an |orders| * 30 ndarray
            opt -- operator, indicating how to compare a and b
        :return
            sta -- an |orders| * |orders| list, where sta[i][j] = + / = / -,
                   representing alg[i] is better / equal / worse than alg[j]
        """
        sta = []
        length = len(self.case.orders)

        for alg1 in range(0, length):
            tp = []
            for alg2 in range(0, length):
                if alg1 == alg2 :
                    tp.append( "\\" )
                else:
                    x1 = arr[alg1]
                    x2 = arr[alg2]
                    if np.array_equal(x1, x2):  # same value array
                        tp.append("=")
                    else:
                        [T, p] = stats.wilcoxon(x1, x2, zero_method='wilcox')
                        if p <= 0.05 :
                            if opt( x1.mean(), x2.mean() ):
                                tp.append("+")  # alg1 is better than alg2
                            else:
                                tp.append("-")  # alg1 is worse than alg2
                        else:
                            tp.append("=")  # alg1 is equal to alg2
            sta.append(tp)
        #for each in sta:
        #    print(each)
        return sta
예제 #20
0
파일: he.py 프로젝트: edawine/fatools
def summarize_he( analytical_sets ):

    results = {}
    he = {}

    for analytical_set in analytical_sets:
        he[analytical_set.label] = calculate_he(analytical_set.allele_df)

    he_df = DataFrame( he )
    labels = list(he_df.columns)
    if len(labels) == 2:
        # use Mann-Whitney / Wilcoxon test
        results['test'] = 'Wilcoxon test (paired)'
        results['stats'] = wilcoxon( he_df[labels[0]], he_df[labels[1]])

    elif len(labels) > 2:
        # use Kruskal Wallis
        results['test'] = 'Kruskal-Wallis test'
        results['stats'] = kruskal( * [he_df[x] for x in labels])
        results['warning'] = ''

    results['data'] = he_df
    results['mean'] = he_df.mean()
    results['stddev'] = he_df.std()
    #raise RuntimeError

    return results
 def get_general_p_value(self, method1, method2):
     method1_values = []
     method2_values = []
     for values_by_method in self._general_method_values.itervalues():
         method1_values.extend(values_by_method[method1])
         method2_values.extend(values_by_method[method2])
     return stats.wilcoxon(method1_values, method2_values)[1]
예제 #22
0
파일: test_sig.py 프로젝트: nikwoj/IVA
def test_sig(fil1, fil2) :
    l1 = []
    l2 = []
    print fil1
    print fil2
    fil1 = open(fil1, "r+")
    fil2 = open(fil2, "r+")
    
    ## Don't read the \n character
    text = fil1.readline()[:-1]
    while text != "" :
        l1.append(log10(float(text)))
        text = fil1.readline()[:-1]
    
    text = fil2.readline()[:-1]
    while text != "" :
        l2.append(log10(float(text)))
        text = fil2.readline()[:-1]
    
    stats = wilcoxon(l1, l2)
    
    print "[PCA IVAG IVAL] ISI  : ", mean(l1)
    print "[PCA      IVAL] ISI  : ", mean(l2)
    print stats
    return stats
예제 #23
0
def main(stat_dir, kernel):
    stats = defaultdict(dict)
    for t, (tfile, parser) in TECHNIQUES.items():
        stat_file = os.path.join(stat_dir, tfile)
        with open(stat_file, 'r') as f:
            for line in f:
                parts = line.strip().split(',')
                results = parser(parts, kernel)
                if results is None: continue
                dset, stat = results
                stats[dset][t] = stat

    S = np.array([[stats[dset][t] for t in TS]
                  for dset in stats.keys()
                    if all((t in stats[dset])
                           for t in TS)])

    wins = (S[:, 0] > S[:, 1]) + 0.5*(S[:, 0] == S[:, 1])
    winrate = np.average(wins)
    if winrate > 0.5:
        symbol = '>'
    elif winrate < 0.5:
        symbol = '<'
    else:
        symbol = '='

    print np.column_stack([S, wins])
    print '%s %s %s' % (TS[0], symbol, TS[1])
    print 'Winrate for %s: %f' % (TS[0], winrate)
    print 'Wilcoxon p-value: %f' % wilcoxon(S[:, 0], S[:, 1])[1]
예제 #24
0
파일: report.py 프로젝트: padenis/attelo
    def significance(self, fun, other, test="wilcoxon"):
        """computes stats significance of difference between two sets
        of scores test can be paired wilcoxon, mannwhitney for indep
        samples, or paired ttest.
        """
        scores1 = self.map_doc_scores(fun)
        scores2 = other.map_doc_scores(fun)
        if isinstance(scores1[0], float) or isinstance(scores1[0], int):
            pass
        else:
            # TODO: this is suspicious
            scores1 = [x for x, _ in scores1]
            scores2 = [x for x, _ in scores2]

        # differences = [(x, y) for (x, y) in zip(scores1, scores2) if x != y]
        # print(difference, file=sys.stderr)
        # print(d2, file=sys.stderr)
        # print([x for (i,x) in enumerate(d1) if x!=d2[i]], file=sys.stderr)
        assert len(scores1) == len(scores1)

        results = {}
        if test == "wilcoxon" or test == "all":
            results["wilcoxon"] = wilcoxon(scores1, scores2)[1]
        if test == "ttest" or test == "all":
            results["paired ttest"] = ttest_rel(scores1, scores2)[1]
        if test == "mannwhitney" or test == "all":
            results["mannwhitney"] = mannwhitneyu(scores1, scores2)[1]
        return results
예제 #25
0
def check_mean():        
    '''Data from Altman, check for significance of mean value.
    Compare average daily energy intake (kJ) over 10 days of 11 healthy women, and
    compare it to the recommended level of 7725 kJ.
    '''
    # Get data from Altman

    data = getData('altman_91.txt')

    # Watch out: by default the SD is calculated with 1/N!
    myMean = np.mean(data)
    mySD = np.std(data, ddof=1)
    print 'Mean and SD: {0:4.2f} and {1:4.2f}'.format(myMean, mySD)

    # Confidence intervals
    tf = stats.t(len(data)-1)
    ci = np.mean(data) + stats.sem(data)*np.array([-1,1])*tf.isf(0.025)
    print 'The confidence intervals are {0:4.2f} to {1:4.2f}.'.format(ci[0], ci[1])

    # Check for significance
    checkValue = 7725
    t, prob = stats.ttest_1samp(data, checkValue)
    if prob < 0.05:
        print '{0:4.2f} is significantly different from the mean (p={1:5.3f}).'.format(checkValue, prob)

    # For not normally distributed data, use the Wilcoxon signed rank test
    (rank, pVal) = stats.wilcoxon(data-checkValue)
    if pVal < 0.05:
      issignificant = 'unlikely'
    else:
      issignificant = 'likely'
      
    print 'It is ' + issignificant + ' that the value is {0:d}'.format(checkValue)
예제 #26
0
def compute_wilcoxon_test(best_test_score):
    """
    """
    
    df_col_name = ['ind_vs_union', 'ind_vs_mtl', \
        'ind_vs_mtmkl', 'union_vs_mtl', 'union_vs_mtmkl', \
        'mtl_vs_mtmkl']
    pairs_test = [('individual', 'union'), ('individual', 'mtl'), \
        ('individual', 'mtmkl'), ('union', 'mtl'), ('union', 'mtmkl'), \
        ('mtl', 'mtmkl')]
    org_names = best_test_score.keys()

    ttest_p_val = numpy.zeros((len(org_names), len(pairs_test)))
    #ttest_p_val = numpy.zeros((len(pairs_test), len(org_names)))

    for org_idx, org_code in enumerate(org_names):
        meth_perf = best_test_score[org_code]

        for pair_idx, rel_pair in enumerate(pairs_test):
            t_stats, p_val = stats.wilcoxon(meth_perf[rel_pair[0]], meth_perf[rel_pair[1]])
            
            ttest_p_val[org_idx, pair_idx] = p_val
            #ttest_p_val[pair_idx, org_idx] = p_val
        
    
    df_pval = pandas.DataFrame(ttest_p_val, columns=df_col_name, index=org_names)
    #df_pval = pandas.DataFrame(ttest_p_val, columns=org_names, index=pairs_test)
    
    return df_pval 
예제 #27
0
def symtest(alpha, axis=None):
    """
    Non-parametric test for symmetry around the median. Works by performing a 
    Wilcoxon sign rank test on the differences to the median.

    H0: the population is symmetrical around the median
    HA: the population is not symmetrical around the median
        

    :param alpha: sample of angles in radian
    :param axis:  compute along this dimension, default is None
                  if axis=None, array is raveled
    :return pval: two-tailed p-value
    :return T:    test statistics of underlying wilcoxon test
   

    References: [Zar2009]_
    """

    if axis is None:
        axis = 0
        alpha = alpha.ravel()
        
    m = descriptive.median(alpha)
    d = descriptive.pairwise_cdiff(m,alpha)
    
    T, pval = stats.wilcoxon(d)



    return pval, T
예제 #28
0
def main():
    parser = argparse.ArgumentParser('Computes statistical significance between two settings.')
    parser.add_argument('csvfiles', nargs='+')
    parser.add_argument('--stratifier', '-s', default='word1', help='Stratifier used in training/testing.')
    args = parser.parse_args()

    all_scores = {}
    for csvfile in args.csvfiles:
        try:
            table = pd.read_csv(csvfile)
        except pd.parser.CParserError:
            print "had to skip %s" % csvfile
            continue
        strat = table.groupby(args.stratifier)
        accs = []
        for stratname, strat in table.groupby(args.stratifier):
            acc = np.sum(strat['target'] == strat['prediction']) / float(len(strat))
            accs.append(acc)
        all_scores[csvfile] = accs

    for leftfile, rightfile in combinations(all_scores.keys(), 2):
        print leftfile
        print rightfile
        left, right = all_scores[leftfile], all_scores[rightfile]
        t, p = wilcoxon(left, right)
        if p < .001:
            stars = "***"
        elif p < .01:
            stars = "**"
        elif p < .05:
            stars = "*"
        else:
            stars = ""
        print "la: %.3f     ra: %.3f   p: %.3f %s" % (np.mean(left), np.mean(right), p, stars)
        print
예제 #29
0
def test_accuracy_wilcoxon():
    freq = [1, 4, 16, 15, 8, 4, 5, 1, 2]
    nums = range(-4, 5)
    x = np.concatenate([[u] * v for u, v in zip(nums, freq)])
    y = np.zeros(x.size)

    T, p = stats.wilcoxon(x, y, "pratt")
    assert_allclose(T, 423)
    assert_allclose(p, 0.00197547303533107)

    T, p = stats.wilcoxon(x, y, "zsplit")
    assert_allclose(T, 441)
    assert_allclose(p, 0.0032145343172473055)

    T, p = stats.wilcoxon(x, y, "wilcox")
    assert_allclose(T, 327)
    assert_allclose(p, 0.00641346115861)
예제 #30
0
def go_nonpar(county, state, stat_list, statemean, winner):

    '''non-paramatric test when the distribution is not normal'''


    sample = winner[(winner['county'] == county) & (winner['state'] == state)]['price'].values
    z_statistic, p_value = wilcoxon(sample - statemean)
    stat_list.append(p_value/2)
예제 #31
0
def plot_consistency_significant_activity_byphase(dataDB,
                                                  ds,
                                                  intervals,
                                                  minTrials=10,
                                                  performance=None,
                                                  dropChannels=None):
    rows = ds.list_dsets_pd()
    rows['mousename'] = [
        dataDB.find_mouse_by_session(session) for session in rows['session']
    ]

    dfColumns = ['datatype', 'trialType', 'consistency']
    dfConsistency = pd.DataFrame(columns=dfColumns)

    for (datatype,
         trialType), rowsMouse in rows.groupby(['datatype', 'trialType']):
        pSigDict = {}
        for mousename, rowsSession in rowsMouse.groupby(['mousename']):
            pSig = []
            for session, rowsTrial in rowsSession.groupby(['session']):
                if (performance is None) or dataDB.is_matching_performance(
                        session, performance, mousename=mousename):
                    assert intervals[0] in list(rowsTrial['intervName'])
                    assert intervals[1] in list(rowsTrial['intervName'])
                    dsetLabel1 = pd_is_one_row(
                        pd_query(rowsTrial,
                                 {'intervName': intervals[0]}))[1]['dset']
                    dsetLabel2 = pd_is_one_row(
                        pd_query(rowsTrial,
                                 {'intervName': intervals[1]}))[1]['dset']
                    data1 = ds.get_data(dsetLabel1)
                    data2 = ds.get_data(dsetLabel2)
                    nTrials1 = data1.shape[0]
                    nTrials2 = data2.shape[1]

                    if (nTrials1 < minTrials) or (nTrials2 < minTrials):
                        print(session, datatype, trialType, 'too few trials',
                              nTrials1, nTrials2, ';; skipping')
                    else:
                        nChannels = data1.shape[1]
                        if dropChannels is not None:
                            channelMask = np.ones(nChannels).astype(bool)
                            channelMask[dropChannels] = 0
                            data1 = data1[:, channelMask]
                            data2 = data2[:, channelMask]
                            nChannels = nChannels - len(dropChannels)

                        pvals = [
                            wilcoxon(data1[:, iCh],
                                     data2[:, iCh],
                                     alternative='two-sided')[1]
                            for iCh in range(nChannels)
                        ]
                        # pSig += [(np.array(pvals) < 0.01).astype(int)]
                        pSig += [-np.log10(np.array(pvals))]
            # pSigDict[mousename] = np.sum(pSig, axis=0)
            pSigDict[mousename] = np.mean(pSig, axis=0)

        mice = sorted(dataDB.mice)
        nMice = len(mice)
        corrCoef = np.zeros((nMice, nMice))
        for iMouse, iName in enumerate(mice):
            for jMouse, jName in enumerate(mice):
                corrCoef[iMouse, jMouse] = np.corrcoef(pSigDict[iName],
                                                       pSigDict[jName])[0, 1]

        sns.pairplot(data=pd.DataFrame(pSigDict), vars=mice)

        prefixPath = 'pics/consistency/significant_activity/byphase/bymouse/'
        make_path(prefixPath)
        plt.savefig(prefixPath + datatype + '_' + trialType + '.svg')
        plt.close()

        fig2, ax2 = plt.subplots()
        ax2.imshow(corrCoef, vmin=0, vmax=1)
        imshow(fig2,
               ax2,
               corrCoef,
               title='Significance Correlation',
               haveColorBar=True,
               limits=[0, 1],
               xTicks=mice,
               yTicks=mice)

        prefixPath = 'pics/consistency/significant_activity/byphase/bymouse_corr/'
        make_path(prefixPath)
        plt.savefig(prefixPath + datatype + '_' + trialType + '.svg')
        plt.close()

        avgConsistency = np.round(np.mean(offdiag_1D(corrCoef)), 2)
        dfConsistency = pd_append_row(dfConsistency,
                                      [datatype, trialType, avgConsistency])

    fig, ax = plt.subplots()
    dfPivot = pd_pivot(dfConsistency, *dfColumns)
    sns.heatmap(data=dfPivot, ax=ax, annot=True, vmax=1, cmap='jet')

    prefixPath = 'pics/consistency/significant_activity/byphase/'
    make_path(prefixPath)
    fig.savefig(prefixPath + 'consistency_' + str(performance) + '.svg')
    plt.close()
예제 #32
0
    significance_ref = get_raw(argbest(mean))

    columns = ['{:15}'.format(str(ds).replace('_', '\\_')[:14])]
    rank = []
    for idx in range(len(mean)):
        rank.append(mean[idx])
        if mean[idx] in {0, 4}:
            columns.append('       ---                  ')
        else:
            entry = []
            if mean[idx] == best(mean):
                entry.append('\\B ')
                significant = True
            else:
                entry.append('   ')
                res = wilcoxon(significance_ref, get_raw(idx))
                significant = res.pvalue < 0.05
            if not significant:
                entry.append('\\ul{')
            else:
                entry.append('    ')
            entry.append(f'{mean[idx]:.3f} \\(\\pm\\) {std[idx]:.3f}')
            if not significant:
                entry.append('}')
            columns.append(''.join(entry))
    tables[metric][0].append('\t& '.join(columns) + '\t\\\\')
    tables[metric][1].append(rank)

for metric, data in tables.items():
    print('\n\n', metric, len(data[0]))
    print('\n'.join(data[0]))
예제 #33
0
def sig_test_wilcoxon(array1, array2):
    if np.all((array1 - array2) == 0):
        pval = 1
    else:
        pval = wilcoxon(array1, array2, alternative="greater")[1]
    return pval
def main():
    _simulations = load.structured()
    _simulations = filtering.by_time_points_amount(_simulations,
                                                   _time_points=TIME_POINTS)
    _simulations = filtering.by_categories(_simulations,
                                           _is_single_cell=True,
                                           _is_heterogeneity=False,
                                           _is_low_connectivity=False,
                                           _is_causality=False,
                                           _is_dominant_passive=False,
                                           _is_fibrin=False)
    print('Total simulations:', len(_simulations))

    _fiber_densities = compute_simulations_fiber_densities(_simulations)

    _y_arrays = [[] for _i in DERIVATIVES]
    for _index_1 in tqdm(range(len(_simulations)), desc='Simulations loop'):
        _simulation_1 = _simulations[_index_1]
        _cell_1_fiber_densities = \
            [_fiber_densities[(_simulation_1, _direction)] for _direction in ['left', 'right', 'up', 'down']]
        _cell_1_fiber_densities = np.mean(_cell_1_fiber_densities, axis=0)
        for _index_2 in range(_index_1 + 1, len(_simulations)):
            _simulation_2 = _simulations[_index_2]
            _cell_2_fiber_densities = \
                [_fiber_densities[(_simulation_2, _direction)] for _direction in ['left', 'right', 'up', 'down']]
            _cell_2_fiber_densities = np.mean(_cell_2_fiber_densities, axis=0)
            for _derivative_index, _derivative in enumerate(DERIVATIVES):
                _y_arrays[_derivative_index].append(
                    compute_lib.correlation(
                        compute_lib.derivative(_cell_1_fiber_densities,
                                               _n=_derivative),
                        compute_lib.derivative(_cell_2_fiber_densities,
                                               _n=_derivative)))

    print('Total points:', len(_y_arrays[0]))
    print('Wilcoxon around the zero')
    for _y_array, _derivative in zip(_y_arrays, DERIVATIVES):
        print('Derivative:', _derivative, wilcoxon(_y_array))

    # plot
    _colors_array = config.colors(3)
    _fig = go.Figure(data=[
        go.Box(y=_y,
               name=_derivative,
               boxpoints='all',
               jitter=1,
               pointpos=0,
               line={'width': 1},
               fillcolor='white',
               marker={
                   'size': 10,
                   'color': _color
               },
               opacity=0.7,
               showlegend=False) for _y, _derivative, _color in zip(
                   _y_arrays, DERIVATIVES_TEXT, _colors_array)
    ],
                     layout={
                         'xaxis': {
                             'title': 'Fiber density derivative',
                             'zeroline': False
                         },
                         'yaxis': {
                             'title': 'Correlation',
                             'range': [-1, 1],
                             'zeroline': False,
                             'tickmode': 'array',
                             'tickvals': [-1, -0.5, 0, 0.5, 1]
                         }
                     })

    save.to_html(_fig=_fig,
                 _path=os.path.join(paths.PLOTS, save.get_module_name()),
                 _filename='plot')
예제 #35
0
                     fontsize=fontSizeLabels)
            plt.axvline(x=0, linestyle='--', linewidth=1.5, color='0.5')
            extraplots.set_ticks_fontsize(plt.gca(), fontSizeTicks)
            #plt.xlabel('Reward modulation index\n(sound period)', fontsize=fontSizeLabels)
            plt.ylabel('Number of cells', fontsize=fontSizeLabels)
            extraplots.boxoff(plt.gca())

            # -- Stats: test whether the modulation index distribution for all good cells is centered at zero -- #
            print('#############################################')
            print(
                '{} has {} sound responsive good cells and {} non-responsive cells'
                .format(brainArea, sum(soundResp), sum(~soundResp)))
            print(
                'Among {} cells, in {} window, {} cells were significantly modulated'
                .format(cellType, modWindow, len(sigModI)))
            (Z, pVal) = stats.wilcoxon(allModI)
            print(
                'Mean mod index is {:.3f}.\nUsing the Wilcoxon signed-rank test, comparing the modulation index distribution for all good cells to zero yielded a p value of {:.3f}'
                .format(np.mean(allModI), pVal))
            #(Z, pVal) = stats.wilcoxon(sigModI)
            #print('For significantly modulated {} cells in {}: Mean mod index is {:.3f}. Using the Wilcoxon signed-rank test, comparing the modulation index distribution to zero yielded a p value of {:.3f}'
            #	.format(cellType, brainArea, np.mean(sigModI), pVal))

summaryFilename = 'summary_reward_modulation_sound_-0.1-0s_rightAC_responsive_cells.npz'
summaryFullPath = os.path.join(dataDir, summaryFilename)
summary = np.load(summaryFullPath)
# soundResp = summary['soundResponsive']
# sigModI = summary['sigModI']
# nonsigModI = summary['nonsigModI']
allModIAC = summary['allModI']
# Anderson-Darling test for normality
result = anderson(dat)
print('stat=%.3f' % (result.statistic))

# Spearman's Rank Correlation
stat, p = spearmanr(dat, dat2)

# Kendall's Tau Correlation
stat, p = kendalltau(dat, dat2)

# Chi-Squared test
table = [[10, 20, 30], [6, 9, 17]]
stat, p, dof, expected = chi2_contingency(table)

# Student's t-test, independent samples
stat, p = ttest_ind(dat, dat2)

# Student's t-test, paired samples
stat, p = ttest_rel(dat, dat2)

# Wilcoxon signed-rank test
stat, p = wilcoxon(dat, dat2)

# ANOVA
stat, p = f_oneway(dat, dat2, dat3)

# Kruskal-Wallis test
stat, p = kruskal(dat, dat2)

# Dickey-Fuller Unit Root test for time series autoregressiveness
stat, p, lags, obs, crit, t = adfuller(dat)
예제 #37
0
    ax.set_ylim(ax.get_xlim())
    ax.plot(ax.get_xlim(), ax.get_xlim(), 'k--')
    ax.set_xlabel('{}\n{}'.format(shortnames[mod1], mod1))
    ax.set_ylabel('{}\n{}'.format(shortnames[mod2], mod2))
    plt.suptitle('model performance\nr_value')
    fig.set_size_inches(5, 5)
    fig.savefig(
        '/home/mateo/Pictures/DAC1/181205_model_performance_{}_vs_{}.png'.
        format(shortnames[mod1], shortnames[mod2]),
        dpi=100)
    fig.savefig(
        '/home/mateo/Pictures/DAC1/181205_model_performance_{}_vs_{}.svg'.
        format(shortnames[mod1], shortnames[mod2]))

tidy = filtered.replace(shortnames)
order = [short for short in shortnames.values() if short != 'resp']
fig, ax = plt.subplots()
g = sns.barplot(x='modelname', y='r_test', data=tidy, order=order, ax=ax)
fig.set_size_inches(5, 5)
fig.suptitle('model performance summary\nWillcoxon test')
fig.savefig('/home/mateo/Pictures/DAC1/181205_sumary_model_performance.png',
            dpi=100)
fig.savefig('/home/mateo/Pictures/DAC1/181205_sumary_model_performance.svg')

pivi = tidy.pivot(index='cellid', columns='modelname', values='r_test')
for mod1, mod2 in itt.combinations(pivi.keys(), 2):
    x = pivi[mod1].values
    y = pivi[mod2].values
    w_test = sst.wilcoxon(x, y)
    print('{} vs {} pvalue: {:.3f}'.format(mod1, mod2, w_test.pvalue))
예제 #38
0
    df["Armband"] = pd.Series(armbands)
    sns.set(style="white", font_scale=4)
    ax = sns.barplot(data=df,
                     x="Number of Training Cycles",
                     y="Accuracy (%)",
                     hue="Armband")
    ax.set_ylim([50, 90])
    plt.subplots_adjust(left=0.08, right=1., top=0.93, bottom=0.14)

    legend = ax.legend()
    legend.texts[0].set_text("Myo Armband")
    sns.set(style="dark", font_scale=4)
    sns.despine()

    for cycle in range(4):
        stat, p = wilcoxon(accuracies_3DC[cycle], accuracies_myo[cycle])
        print(p)
        if p < 0.05:
            p_rounded = np.round(p, decimals=5)
            if p_rounded > 0:
                label_diff(current_cycle=cycle,
                           p_value=p_rounded,
                           sign_to_use="=")
            else:
                label_diff(current_cycle=cycle,
                           p_value=0.0001,
                           sign_to_use="<")
    mng = plt.get_current_fig_manager()
    plt.legend(loc='lower right')
    mng.window.state('zoomed')  # works fine on Windows!
             edgecolor='black',
             color=histogram_colours[iti])
    plt.xlim(-1, 1)
    plt.savefig(figure_filename)

    figure_filename = 'Variable ITIs/' + Distribution_name + '/Mixed population softmax probability difference histogram for ' + ITI_labels[
        iti] + '.eps'
    plt.figure(iti + 2)
    plt.hist(score_softmax2.reshape(n_rats * n_blocks),
             bins=np.linspace(-1, 1, num=13),
             edgecolor='black',
             color=histogram_colours[iti])
    plt.xlim(-1, 1)
    plt.savefig(figure_filename)

    all_scores[:, iti] = score_withoutexplore.reshape(n_rats * n_blocks)
    W[iti], p[iti] = stats.wilcoxon(all_scores[:, iti])
    mu[iti] = np.mean(all_scores[:, iti])

    all_scores_softmax[:, iti] = score_softmax2.reshape(n_rats * n_blocks)
    W_softmax[iti], p_softmax[iti] = stats.wilcoxon(all_scores_softmax[:, iti])
    mu_softmax[iti] = np.mean(all_scores_softmax[:, iti])

W_directComparison, p_directComparison = stats.wilcoxon(
    all_scores[:, 0], all_scores[:, 1])
mu_directcomparison = np.mean(all_scores[:, 1] - all_scores[:, 0])

W_directComparison_softmax, p_directComparison_softmax = stats.wilcoxon(
    all_scores_softmax[:, 0], all_scores[:, 1])
mu_directcomparison_softmax = np.mean(all_scores_softmax[:, 1] -
                                      all_scores_softmax[:, 0])
예제 #40
0
            uint = np.append(uint, upol + ucoul)
            #if weights[z] > 0:
            #    uint = np.append(uint, upol + ucoul)
            #    cut_weights = np.append(cut_weights, weights[z])
        if pair_num == 0:
            uint_array_1 = uint
            weights_array_1 = weights
        elif pair_num == 1:
            uint_array_2 = uint
            weights_array_2 = weights
#        average = np.average(uint, weights=weights)
#        variance = np.average((uint-average)**2, weights=weights)
#        stdDev = math.sqrt(variance)
    uint_array_1 = np.multiply(uint_array_1, weights_array_1)
    uint_array_2 = np.multiply(uint_array_2, weights_array_2)
    pval = stats.wilcoxon(uint_array_1, uint_array_2)
    #    outputFileName  = prot_family + '_pval.txt'
    #    f = open(outputFileName, 'w')
    #    f.write(str(pval))
    #    f.close
    plt.scatter(uint_array_1, weights_array_1, color='blue')
    plt.scatter(uint_array_2, weights_array_2, color='red')
    plt.xlabel('Uint')
    plt.ylabel('Weights')
    plt.title(protnames[iter] + ' ' + protnames[iter + pair_num] +
              ' folded energy distributions')
    plt.show
    plt.savefig(protnames[iter] + '_' + protnames[iter + pair_num] +
                '_energy_distributions.pdf')
    print protnames[iter]
    print protnames[iter + pair_num]
예제 #41
0
def test_wilcoxon_result_attributes():
    x = np.array([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112])
    y = np.array([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187])
    res = stats.wilcoxon(x, y, correction=False)
    attributes = ('statistic', 'pvalue')
    check_named_results(res, attributes)
예제 #42
0
    def cellStaProcessing(self, test='t_test'):
        
        if self.stim_start_frames:
            
            #this is the key parameter for the sta, how many frames before and after the stim onset do you want to use
            self.pre_frames = int(np.ceil(self.fps*0.5)) # 500 ms pre-stim period
            self.post_frames = int(np.ceil(self.fps*3)) # 3000 ms post-stim period

            #list of cell pixel intensity values during each stim on each trial
            self.all_trials = [] # list 1 = cells, list 2 = trials, list 3 = dff vector

            # the average of every trial
            self.stas = [] # list 1 = cells, list 2 = sta vector

            self.all_amplitudes = []
            self.sta_amplitudes = []

            self.t_tests = []
            self.wilcoxons = []

            for plane in range(self.n_planes):

                all_trials = [] # list 1 = cells, list 2 = trials, list 3 = dff vector

                stas = [] # list 1 = cells, list 2 = sta vector

                all_amplitudes = []
                sta_amplitudes = []

                t_tests = []
                wilcoxons = []

                #loop through each cell
                for i, unit in enumerate(self.raw[plane]):

                    trials = []
                    amplitudes = []
                    df = []
                    
                    # a flat list of all observations before stim occured
                    pre_obs = []
                    # a flat list of all observations after stim occured
                    post_obs = []
                    
                    for stim in self.stim_start_frames[plane]:
                        
                        # get baseline values from pre_stim
                        pre_stim_f  = unit[ stim - self.pre_frames : stim ]
                        baseline = np.mean(pre_stim_f)

                        # the whole trial and dfof using baseline
                        trial = unit[ stim - self.pre_frames : stim + self.post_frames ]
                        trial = [ ( (f-baseline) / baseline) * 100 for f in trial ] #dff calc
                        trials.append(trial)
                        
                        #calc amplitude of response        
                        pre_f = trial[ : self.pre_frames - 1]
                        pre_f = np.mean(pre_f)
                        
                        avg_post_start = self.pre_frames + ( self.duration_frames + 1 )
                        avg_post_end = avg_post_start + int(np.ceil(self.fps*0.5)) # post-stim period of 500 ms
                        
                        post_f = trial[avg_post_start : avg_post_end]
                        post_f = np.mean(post_f)
                        amplitude = post_f - pre_f
                        amplitudes.append(amplitude)
                        
                        # append to flat lists
                        pre_obs.append(pre_f)
                        post_obs.append(post_f)

                        
                    trials = np.array(trials)
                    all_trials.append(trials)
                    
                    #average amplitudes across trials
                    amplitudes = np.array(amplitudes)
                    all_amplitudes.append(amplitudes)
                    sta_amplitude = np.mean(amplitudes,0)
                    sta_amplitudes.append(sta_amplitude)

                    #average across all trials
                    sta = np.mean(trials, 0)        
                    stas.append(sta)
                    
                    #remove nans from flat lists
                    pre_obs = [x for x in pre_obs if ~np.isnan(x)]
                    post_obs = [x for x in post_obs if ~np.isnan(x)]
                    
                    #t_test and man whit test pre and post stim (any other test could also be used here)
                    t_test = stats.ttest_rel(pre_obs, post_obs)
                    t_tests.append(t_test)
                    
                    wilcoxon = stats.wilcoxon(pre_obs, post_obs)
                    wilcoxons.append(wilcoxon)

                self.all_trials.append(np.array(all_trials))
                self.stas.append(np.array(stas))
                
                self.all_amplitudes.append(np.array(all_amplitudes))
                self.sta_amplitudes.append(np.array(sta_amplitudes))

                self.t_tests.append(np.array(t_tests))
                self.wilcoxons.append(np.array(wilcoxons))
            
            plt.figure()
            plt.plot([avg_post_start] * 2, [-1000, 1000])
            plt.plot([avg_post_end] * 2, [-1000, 1000])
            plt.plot([self.pre_frames - 1] * 2, [-1000, 1000])
            plt.plot([0] * 2, [-1000, 1000])
            plt.plot(stas[5])
            plt.plot(stas[10])
            plt.plot(stas[15])
            plt.ylim([-100,200]) 

            self.staSignificance(test)
            self.singleTrialSignificance()   
fig.savefig("vu_data_boxplot.png")
#fig.savefig("nih_data_boxplot.png")


# In[16]:


from scipy.stats import wilcoxon


# In[21]:


print("Wilcoxon between NIH weights, Multi weights")
print(wilcoxon(nih_df['dice'], msl_full_df['dice']))
print("Wilcoxon between NIH weights, MSL 1/2 A weights")
print(wilcoxon(nih_df['dice'], msl_half_df['dice']))
print("Wilcoxon between NIH weights, MSL 1/2 B weights")
print(wilcoxon(nih_df['dice'], msl_other_half_df['dice']))


# In[22]:


print("Wilcoxon between VU weights, Multi weights")
print(wilcoxon(vu_df['dice'], msl_full_df['dice']))
print("Wilcoxon between VU weights, MSL 1/2 A weights")
print(wilcoxon(vu_df['dice'], msl_half_df['dice']))
print("Wilcoxon between VU weights, MSL 1/2 B weights")
print(wilcoxon(vu_df['dice'], msl_other_half_df['dice']))
예제 #44
0
from scipy.stats import wilcoxon

a = [14, 18, 2, 4, -5, 14, -3, -1, 1, 6, 3, 3]
b = [8, 26, -7, -1, 2, 9, 0, -4, 13, 3, 3, 4]

print(wilcoxon(a, b, zero_method='zsplit'))
def wilcoxon_holm(alpha=0.05, df_perf=None):
    """
    Applies the wilcoxon signed rank test between each pair of algorithm and then use Holm
    to reject the null's hypothesis
    """
    # count the number of tested datasets per classifier
    df_counts = pd.DataFrame({
        'count': df_perf.groupby(['model']).size()
    }).reset_index()
    # get the maximum number of tested datasets
    max_nb_datasets = df_counts['count'].max()
    # get the list of classifiers who have been tested on nb_max_datasets
    classifiers = list(
        df_counts.loc[df_counts['count'] == max_nb_datasets]['model'])
    # test the null hypothesis using friedman before doing a post-hoc analysis
    friedman_p_value = friedmanchisquare(
        *(np.array(df_perf.loc[df_perf['model'] == c]['score'])
          for c in classifiers))[1]
    if friedman_p_value >= alpha:
        # then the null hypothesis over the entire classifiers cannot be rejected
        raise ValueError(
            'Not enough datasets to reject the null hypothesis over the entire classifiers'
        )
    # get the number of classifiers
    m = len(classifiers)
    # init array that contains the p-values calculated by the Wilcoxon signed rank test
    p_values = []
    # loop through the algorithms to compare pairwise
    for i in range(m - 1):
        # get the name of classifier one
        classifier_1 = classifiers[i]
        # get the performance of classifier one
        perf_1 = np.array(
            df_perf.loc[df_perf['model'] == classifier_1]['score'],
            dtype=np.float64)
        for j in range(i + 1, m):
            # get the name of the second classifier
            classifier_2 = classifiers[j]
            # get the performance of classifier one
            perf_2 = np.array(
                df_perf.loc[df_perf['model'] == classifier_2]['score'],
                dtype=np.float64)
            # calculate the p_value
            p_value = wilcoxon(perf_1, perf_2, zero_method='pratt')[1]
            # appen to the list
            p_values.append((classifier_1, classifier_2, p_value, False))
    # get the number of hypothesis
    k = len(p_values)
    # sort the list in acsending manner of p-value
    p_values.sort(key=operator.itemgetter(2))

    # loop through the hypothesis
    for i in range(k):
        # correct alpha with holm
        new_alpha = float(alpha / (k - i))
        # test if significant after holm's correction of alpha
        if p_values[i][2] <= new_alpha:
            p_values[i] = (p_values[i][0], p_values[i][1], p_values[i][2],
                           True)
        else:
            # stop
            break
    # compute the average ranks to be returned (useful for drawing the cd diagram)
    # sort the dataframe of performances
    sorted_df_perf = df_perf.loc[df_perf['model'].isin(classifiers)]. \
        sort_values(['model', 'dataset'])
    # get the rank data
    rank_data = np.array(sorted_df_perf['score']).reshape(m, max_nb_datasets)

    # create the data frame containg the accuracies
    df_ranks = pd.DataFrame(data=rank_data,
                            index=np.sort(classifiers),
                            columns=np.unique(sorted_df_perf['dataset']))

    # number of wins
    dfff = df_ranks.rank(ascending=False)

    # average the ranks
    average_ranks = df_ranks.rank(ascending=False).mean(axis=1).sort_values(
        ascending=False)
    # return the p-values and the average ranks
    return p_values, average_ranks, max_nb_datasets
예제 #46
0
          (begin_date, round(init_svi, 4), round(
              init_bs, 4), round(portfolio_net_svi / init_svi,
                                 4), round(portfolio_net_bs / init_bs, 4),
           round(tradedamt_svi / holdamt_svi,
                 4), round(tradedamt_bs / holdamt_bs, 4)))
print('=' * 200)
print("%20s %20s %20s %20s %20s %20s %20s %20s" %
      ("eval date", "spot", "delta", 'price_svi', 'price_bs', 'portfolio_svi',
       'portfolio_bs', 'transaction'))
print('svi_pnl', sum(svi_pnl) / len(svi_pnl))
print('bs_pnl', sum(bs_pnl) / len(bs_pnl))
results = {}
results.update({'date': dates})
results.update({'pnl svi': svi_pnl})
results.update({'pnl bs': bs_pnl})
results.update({'option init svi': option_init_svi})
results.update({'option init bs': option_init_bs})
results.update({'transaction svi': transaction_svi})
results.update({'transaction bs': transaction_bs})
results.update({'holdings svi': holdings_svi})
results.update({'holdings bs': holdings_bs})

df = pd.DataFrame(data=results)
df.to_csv(
    os.path.abspath('..') + '/results2/dh_plain_' + contractType + '_r=' +
    str(rebalancerate) + '_2.csv')

t, p = stats.ttest_ind(svi_pnl, bs_pnl)
t1, p1 = stats.wilcoxon(svi_pnl, bs_pnl)
print(t, p)
print(t1, p1)
def sig_diff_plot(an,
                  region,
                  abbrv1,
                  abbrv2,
                  thres=[0.05, 0.01, 0.005],
                  plot=True,
                  cbar=True,
                  cshift=0.75,
                  csize=0.5,
                  fig=None,
                  ax=None,
                  title=None,
                  global_title=None):
    stats = np.zeros((len(an.lags), len(an.sessions) + 1))
    signif = np.zeros_like(stats)
    for i_lag, lag in enumerate(an.lags):
        for i_s, session in enumerate(an.sessions):
            for i_test, (sign,
                         string) in enumerate(zip([1, -1],
                                                  ['greater', 'less'])):
                stats[i_lag, i_s], p = sstats.wilcoxon(
                    x=an.var_mean(abbrv=abbrv1,
                                  region=region,
                                  sessions=[session])[i_lag, :],
                    y=an.var_mean(abbrv=abbrv2,
                                  region=region,
                                  sessions=[session])[i_lag, :],
                    zero_method='wilcox',
                    correction=False,
                    alternative=string)  # 'two-sided')
                signif[i_lag, i_s] += sign * np.sum([p < t for t in thres])

        for i_test, (sign,
                     string) in enumerate(zip([1, -1], ['greater', 'less'])):
            stats[i_lag, -1], p = sstats.wilcoxon(
                x=an.var_mean(abbrv=abbrv1,
                              region=region,
                              sessions=an.sessions)[i_lag, :],
                y=an.var_mean(abbrv=abbrv2,
                              region=region,
                              sessions=an.sessions)[i_lag, :],
                zero_method='wilcox',
                correction=False,
                alternative=string)  # 'two-sided')
            signif[i_lag, -1] += sign * np.sum([p < t for t in thres])

    if not plot:
        return signif

    if ax is None:
        ax = plt.gca()
    if fig is None:
        fig = plt.gcf()

    plt.imshow(signif[an.lag_offset:, :].T,
               cmap="bwr",
               vmin=-len(thres),
               vmax=len(thres))  # "Greys")

    ax.set_xticks(np.arange(10))
    ax.set_yticks(np.arange(8))
    ax.set_xticklabels(an.lags_sec[an.lag_offset:])
    ax.set_yticklabels(an.sessions + ["all"])
    ax.set_ylabel("sessions")
    ax.set_xlabel("lag (s)")

    ax.set_title(region + ": difference between " + abbrv1 + " and " +
                 abbrv2 if title is None else title)
    if global_title is not None:
        fig.suptitle(global_title, fontsize=14)

    if not cbar:
        return signif

    fig.subplots_adjust(right=cshift)
    cbar_ax = fig.add_axes([cshift + 0.05, (1 - csize) / 2, 0.05, csize])
    sm = plt.cm.ScalarMappable(cmap=plt.cm.bwr,
                               norm=plt.Normalize(vmin=-len(thres),
                                                  vmax=len(thres)))
    sm._A = []
    cbar = fig.colorbar(sm, cax=cbar_ax, ticks=[-3, -2, -1, 0, 1, 2, 3])
    cbar.ax.set_yticklabels([
        'p<0.001\n' + abbrv1 + '<' + abbrv2, 'p<0.01', 'p<0.05', 'n.s.',
        'p<0.05', 'p<0.01', abbrv1 + '>' + abbrv2 + '\np<0.001'
    ])

    return signif
예제 #48
0
    def runSupervisionedModels(self):
        measurements = {}
        skf = StratifiedKFold(n_splits=10)
        models = [
            ('k-NN', KNeighborsClassifier(n_neighbors=13, weights='distance')),
            ('AD', DecisionTreeClassifier()),
            ('NB', GaussianNB()),
            ('MLP', MLPClassifier(momentum=0.8, max_iter=500, learning_rate_init=0.1, hidden_layer_sizes=12))
        ]
    
        
        
        for base in self.bases:
            measurements[base] = {}
            print('='*20 + base + '='*20)
            dataset = self.bases[base]
            array = dataset.values
            arrLen = len(dataset.columns) - 1
            X = array[:,0:arrLen]
            y = array[:,arrLen]
            X = normalize(X)
            le = preprocessing.LabelEncoder()
            y = le.fit_transform(y)
            X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.20, random_state=50)
            for name, model in models:
                
                clf = model.fit(X_train, Y_train)
                predictions = model.predict(X_test)    
                print(f'{name} Acurácia de predição:', accuracy_score(Y_test, predictions))
                measurements[base][name] = predictions
                #measurements[base][name].append(predictions)
                #a = accuracy_score(Y_test, predictions)
            
        
        pprint(measurements)
        for base in self.bases:
            msAD = measurements[base]['AD']
            msknn = measurements[base]['k-NN']
            msNB = measurements[base]['NB']
            msMLP = measurements[base]['MLP']
            #print(msAD)
            mean1 = np.mean(msAD)
            mean2 = np.mean(msknn)
            mean3 = np.mean(msNB)
            mean4 = np.mean(msMLP)
            
            data = [
                ('AD', msAD),
                ('knn', msknn),
                ('NB', msNB),
                ('MLP', msMLP)
            ]
            possibilities = [
                (msAD, msknn),
                (msAD, msNB),
                (msAD, msMLP),
                (msknn, msNB),
                (msknn, msMLP),
                (msNB, msMLP)
            ]
 
            print('='*10 + base + '='*10 + '\n')
            for a, b in possibilities:
            
                tests = [
                    ('Ttest-ind', stats.ttest_ind(a, b)),
                    ('Wilcoxon', stats.wilcoxon(a, b))
                ]
                for name, test in tests:
                    stat, p = test
                    if p > 0.05:
	                    print(f'{name}: stat=%.3f, p=%.3f -- Probably the same distribution' % (stat, p))
                    else:
                        print(f'{name}: stat=%.3f, p=%.3f -- Probably different distribution' % (stat, p))
	                    #print('Probably different distributions')
            
            print('\n')
예제 #49
0
                output_results('effort', twenty_per)
                output_results('effortcore', twenty_per)

                Output_POPT(ratio)
            except Exception, e:
                print str(e)

        P_opt_file.write(
            str(average_value(P_opt_betweenness_list)) + ',' +
            str(average_value(P_opt_pagerank_list)) + ',' +
            str(average_value(P_opt_degree_list)) + ',' +
            str(average_value(P_opt_effort_list)) + ',' +
            str(average_value(P_opt_effortcore_list)) + '\n')
        Statistical_file.write(
            "betweenness&effort: " +
            str(stats.wilcoxon(P_opt_betweenness_list, P_opt_effort_list)) +
            '\n')
        Statistical_file.write(
            "pagerank&effort: " +
            str(stats.wilcoxon(P_opt_pagerank_list, P_opt_effort_list)) + '\n')
        Statistical_file.write(
            "degree&effort: " +
            str(stats.wilcoxon(P_opt_degree_list, P_opt_effort_list)) + '\n')
        Statistical_file.write(
            "effortcore&effort: " +
            str(stats.wilcoxon(P_opt_effortcore_list, P_opt_effort_list)) +
            '\n')
        Statistical_file.write(
            "effortcore&degree: " +
            str(stats.wilcoxon(P_opt_effortcore_list, P_opt_degree_list)) +
            '\n')
예제 #50
0
def generate_main_results(data_path, results_path):
    """Generate the main results of the experiment."""

    # Load dataset
    dataset = load_datasets(data_path=data_path, data_type='csv')[0]

    # Load results
    results = []
    for name in RESULTS_NAMES:
        file_path = join(results_path, f'{name}.pkl')
        results.append(pd.read_pickle(file_path))

    # Combine and select results
    results = combine_results(*results)
    results = select_results(results,
                             oversamplers_names=OVRS_NAMES,
                             classifiers_names=CLFS_NAMES)

    # Extract metrics names
    metrics_names, *_ = zip(*METRICS_MAPPING.items())

    # Dataset description
    dataset_description = describe_dataset(dataset)

    # Scores
    wide_optimal = calculate_wide_optimal(results).drop(columns='Dataset')

    # Ranking
    ranking = calculate_ranking(results).drop(columns='Dataset')
    ranking.iloc[:, 2:] = ranking.iloc[:, 2:].astype(int)

    # Percentage difference
    perc_diff_scores = []
    for oversampler in BASELINE_OVRS:
        perc_diff_scores_ovs = calculate_mean_sem_perc_diff_scores(
            results, [oversampler, 'G-SMOTE'])[0]
        perc_diff_scores_ovs = perc_diff_scores_ovs[[
            'Difference'
        ]].rename(columns={'Difference': oversampler})
        perc_diff_scores.append(perc_diff_scores_ovs)
    perc_diff_scores = sort_tbl(pd.concat([
        ranking[['Classifier', 'Metric']],
        pd.concat(perc_diff_scores, axis=1)
    ],
                                          axis=1),
                                clfs_order=CLFS_NAMES,
                                ovrs_order=OVRS_NAMES,
                                metrics_order=metrics_names)
    perc_diff_scores.iloc[:, 2:] = round(perc_diff_scores.iloc[:, 2:], 2)

    # Wilcoxon test
    pvalues = []
    for ovr in OVRS_NAMES[:-1]:
        mask = (wide_optimal['Metric'] != 'accuracy'
                ) if ovr == 'NONE' else np.repeat(True, len(wide_optimal))
        pvalues.append(
            wilcoxon(wide_optimal.loc[mask, ovr],
                     wide_optimal.loc[mask, 'G-SMOTE']).pvalue)
    wilcoxon_results = pd.DataFrame({
        'Oversampler': OVRS_NAMES[:-1],
        'p-value': pvalues,
        'Significance': np.array(pvalues) < ALPHA
    })

    # Format results
    main_results = [(MAIN_RESULTS_NAMES[0], dataset_description)]
    for name, result in zip(
            MAIN_RESULTS_NAMES[1:],
        (wide_optimal, ranking, perc_diff_scores, wilcoxon_results)):
        if name != 'wilcoxon_results':
            result = sort_tbl(result,
                              clfs_order=CLFS_NAMES,
                              ovrs_order=OVRS_NAMES,
                              metrics_order=metrics_names)
            result['Metric'] = result['Metric'].apply(
                lambda metric: METRICS_MAPPING[metric])
        if name == 'wide_optimal':
            result.iloc[:, 2:] = result.iloc[:, 2:].apply(
                lambda row: make_bold(row, True, 3), axis=1)
        elif name == 'ranking':
            result.iloc[:, 2:] = result.iloc[:, 2:].apply(
                lambda row: make_bold(row, False, 0), axis=1)
        elif name == 'wilcoxon_results':
            wilcoxon_results = generate_pvalues_tbl(wilcoxon_results)
        main_results.append((name, result))

    return main_results
예제 #51
0
from statsmodels.sandbox.stats.runs import runstest_2samp
from statsmodels.stats.descriptivestats import sign_test

import scipy.stats as stats

# 分布的检验

from scipy.stats import kstest
import numpy as np
x = np.random.normal(0, 1, 1000)
test_stat = kstest(x, 'norm', args=(x.mean(), x.std()))
print(test_stat)

stats.anderson()
stats.shapiro()
stats.ranksums()
stats.mannwhitneyu()
stats.wilcoxon()
stats.ks_2samp()

runstest_2samp()
예제 #52
0
                    r2=randint(0,len_pop)
                    rcut=randint(0,dimension)
                    S[i]=S[r1]
                    S[i][rcut]=S[r2][cut]
        
                #mutacion
                    r3=randint(0,len_pop)
                    r1=randint(0,len_pop)
                    r2=randint(0,len_pop)
                    for j in range(dimension):
                        S[i][j] = D[r1][j] + random.gauss(0, 1)*(S[r2][j]-S[r3][j])
                        S[i][j] = lim_restr(S[i][j], l_min[j],l_max[j])
                
                        
                f_new = Fun(S[i])
                rnd = np.random.random_sample()

                if (f_new <= aptitud[i]) and (rnd < A):
                    for j in range(dimension):
                        po_sol[i][j] = S[i][j]
                    aptitud[i] = f_new

                if f_new <= f_min:
                    for j in range(dimension):
                        mejor[j] = S[i][j]
                    f_min = f_new

        print("exp:" ,exp," f_min: ",f_min)
        print(po_sol)
        print(stats.wilcoxon(po_sol[:,1],po_sol[:,5]))
예제 #53
0
def main(_high_temporal_resolution=True):
    _experiments = all_experiments()
    _experiments = filtering.by_categories(
        _experiments=_experiments,
        _is_single_cell=False,
        _is_high_temporal_resolution=_high_temporal_resolution,
        _is_bleb=False,
        _is_dead_dead=False,
        _is_live_dead=False,
        _is_bead=False,
        _is_metastasis=False)

    _tuples = load.experiments_groups_as_tuples(_experiments)
    _tuples = filtering.by_pair_distance_range(_tuples, PAIR_DISTANCE_RANGE)
    _tuples = filtering.by_real_pairs(_tuples)
    _tuples = filtering.by_band(_tuples)
    print('Total tuples:', len(_tuples))

    _arguments = []
    for _tuple in _tuples:
        _experiment, _series_id, _group = _tuple
        for _cell_id in ['left_cell', 'right_cell']:
            _latest_time_frame = compute.latest_time_frame_before_overlapping(
                _experiment, _series_id, _group, OFFSET_X)
            _arguments.append({
                'experiment': _experiment,
                'series_id': _series_id,
                'group': _group,
                'length_x': QUANTIFICATION_WINDOW_LENGTH_IN_CELL_DIAMETER,
                'length_y': QUANTIFICATION_WINDOW_HEIGHT_IN_CELL_DIAMETER,
                'length_z': QUANTIFICATION_WINDOW_WIDTH_IN_CELL_DIAMETER,
                'offset_x': OFFSET_X,
                'offset_y': OFFSET_Y,
                'offset_z': OFFSET_Z,
                'cell_id': _cell_id,
                'direction': 'inside',
                'time_points': _latest_time_frame
            })
            if ALIGNMENT_OFFSET_Y != OFFSET_Y:
                _arguments.append({
                    'experiment': _experiment,
                    'series_id': _series_id,
                    'group': _group,
                    'length_x': QUANTIFICATION_WINDOW_LENGTH_IN_CELL_DIAMETER,
                    'length_y': QUANTIFICATION_WINDOW_HEIGHT_IN_CELL_DIAMETER,
                    'length_z': QUANTIFICATION_WINDOW_WIDTH_IN_CELL_DIAMETER,
                    'offset_x': OFFSET_X,
                    'offset_y': ALIGNMENT_OFFSET_Y,
                    'offset_z': OFFSET_Z,
                    'cell_id': _cell_id,
                    'direction': 'inside',
                    'time_points': _latest_time_frame
                })

    _windows_dictionary, _windows_to_compute = \
        compute.windows(_arguments, _keys=['experiment', 'series_id', 'group', 'cell_id', 'offset_y'])
    _fiber_densities = compute.fiber_densities(_windows_to_compute)

    _experiments_fiber_densities = {
        _key:
        [_fiber_densities[_tuple] for _tuple in _windows_dictionary[_key]]
        for _key in _windows_dictionary
    }

    _experiments_fiber_densities_aligned = align_by_z_score(
        _tuples, _experiments_fiber_densities)
    _tuples_by_experiment = organize.by_experiment(_tuples)

    _same_correlations_array = []
    _different_correlations_array = []
    _valid_tuples = []
    for _experiment in _tuples_by_experiment:
        print('Experiment:', _experiment)
        _experiment_tuples = _tuples_by_experiment[_experiment]

        for _same_index in tqdm(range(len(_experiment_tuples)),
                                desc='Main loop'):
            _same_tuple = _experiment_tuples[_same_index]
            _same_experiment, _same_series, _same_group = _same_tuple

            _same_left_cell_fiber_densities = \
                _experiments_fiber_densities_aligned[
                    (_same_experiment, _same_series, _same_group, 'left_cell')
                ]
            _same_right_cell_fiber_densities = \
                _experiments_fiber_densities_aligned[
                    (_same_experiment, _same_series, _same_group, 'right_cell')
                ]

            _same_properties = \
                load.group_properties(_same_experiment, _same_series, _same_group)
            _same_left_cell_fiber_densities = compute.remove_blacklist(
                _same_experiment, _same_series,
                _same_properties['cells_ids']['left_cell'],
                _same_left_cell_fiber_densities)
            _same_right_cell_fiber_densities = compute.remove_blacklist(
                _same_experiment, _same_series,
                _same_properties['cells_ids']['right_cell'],
                _same_right_cell_fiber_densities)

            _same_left_cell_fiber_densities_filtered, _same_right_cell_fiber_densities_filtered = \
                compute.longest_same_indices_shared_in_borders_sub_array(
                    _same_left_cell_fiber_densities, _same_right_cell_fiber_densities
                )

            # ignore small arrays
            if len(_same_left_cell_fiber_densities_filtered
                   ) < compute.minimum_time_frames_for_correlation(
                       _same_experiment):
                continue

            _same_correlation = compute_lib.correlation(
                compute_lib.derivative(
                    _same_left_cell_fiber_densities_filtered, _n=DERIVATIVE),
                compute_lib.derivative(
                    _same_right_cell_fiber_densities_filtered, _n=DERIVATIVE))
            for _different_index in range(len(_experiment_tuples)):
                if _same_index != _different_index:
                    _different_tuple = _experiment_tuples[_different_index]
                    _different_experiment, _different_series, _different_group = \
                        _different_tuple
                    for _same_cell_id, _different_cell_id in product(
                        ['left_cell', 'right_cell'],
                        ['left_cell', 'right_cell']):
                        _same_fiber_densities = _experiments_fiber_densities_aligned[
                            (_same_experiment, _same_series, _same_group,
                             _same_cell_id)]
                        _different_fiber_densities = _experiments_fiber_densities_aligned[
                            (_different_experiment, _different_series,
                             _different_group, _different_cell_id)]

                        _different_properties = load.group_properties(
                            _different_experiment, _different_series,
                            _different_group)
                        _same_fiber_densities = compute.remove_blacklist(
                            _same_experiment, _same_series,
                            _same_properties['cells_ids'][_same_cell_id],
                            _same_fiber_densities)
                        _different_fiber_densities = compute.remove_blacklist(
                            _different_experiment, _different_series,
                            _different_properties['cells_ids']
                            [_different_cell_id], _different_fiber_densities)

                        _same_fiber_densities_filtered, _different_fiber_densities_filtered = \
                            compute.longest_same_indices_shared_in_borders_sub_array(
                                _same_fiber_densities, _different_fiber_densities
                            )

                        # ignore small arrays
                        if len(_same_fiber_densities_filtered
                               ) < compute.minimum_time_frames_for_correlation(
                                   _different_experiment):
                            continue

                        _different_correlation = compute_lib.correlation(
                            compute_lib.derivative(
                                _same_fiber_densities_filtered, _n=DERIVATIVE),
                            compute_lib.derivative(
                                _different_fiber_densities_filtered,
                                _n=DERIVATIVE))

                        _same_correlations_array.append(_same_correlation)
                        _different_correlations_array.append(
                            _different_correlation)

                        if _same_tuple not in _valid_tuples:
                            _valid_tuples.append(_same_tuple)

    print('Total tuples:', len(_valid_tuples))
    print('Total points:', len(_same_correlations_array))
    _same_minus_different = \
        np.array(_same_correlations_array) - np.array(_different_correlations_array)
    print('Wilcoxon of same minus different around the zero:')
    print(wilcoxon(_same_minus_different))
    print('Higher same amount:',
          (_same_minus_different > 0).sum() / len(_same_minus_different))

    # plot
    _fig = go.Figure(data=go.Scatter(x=_same_correlations_array,
                                     y=_different_correlations_array,
                                     mode='markers',
                                     marker={
                                         'size': 5,
                                         'color': '#ea8500'
                                     },
                                     showlegend=False),
                     layout={
                         'xaxis': {
                             'title': 'Same network correlation',
                             'zeroline': False,
                             'range': [-1.1, 1.2],
                             'tickmode': 'array',
                             'tickvals': [-1, -0.5, 0, 0.5, 1]
                         },
                         'yaxis': {
                             'title': 'Different network correlation',
                             'zeroline': False,
                             'range': [-1.1, 1.2],
                             'tickmode': 'array',
                             'tickvals': [-1, -0.5, 0, 0.5, 1]
                         },
                         'shapes': [{
                             'type': 'line',
                             'x0': -1,
                             'y0': -1,
                             'x1': -1,
                             'y1': 1,
                             'line': {
                                 'color': 'black',
                                 'width': 2
                             }
                         }, {
                             'type': 'line',
                             'x0': -1,
                             'y0': -1,
                             'x1': 1,
                             'y1': -1,
                             'line': {
                                 'color': 'black',
                                 'width': 2
                             }
                         }, {
                             'type': 'line',
                             'x0': -1,
                             'y0': -1,
                             'x1': 1,
                             'y1': 1,
                             'line': {
                                 'color': 'red',
                                 'width': 2
                             }
                         }]
                     })

    save.to_html(_fig=_fig,
                 _path=os.path.join(paths.PLOTS, save.get_module_name()),
                 _filename='plot_high_time_' + str(_high_temporal_resolution))
예제 #54
0
def stat_test(box_data1,
              box_data2,
              test,
              comparisons_correction=None,
              num_comparisons=1,
              **stats_params):
    """Get formatted result of two sample statistical test.

    Arguments
    ---------
    bbox_data1, bbox_data2
    test: str
        Statistical test to run. Must be one of:
        - `Levene`
        - `Mann-Whitney`
        - `Mann-Whitney-gt`
        - `Mann-Whitney-ls`
        - `t-test_ind`
        - `t-test_welch`
        - `t-test_paired`
        - `Wilcoxon`
        - `Kruskal`
    comparisons_correction: str or None, default None
        Method to use for multiple comparisons correction. Currently only the
        Bonferroni correction is implemented.
    num_comparisons: int, default 1
        Number of comparisons to use for multiple comparisons correction.
    stats_params
        Additional keyword arguments to pass to scipy stats functions.

    Returns
    -------
    StatResult object with formatted result of test.

    """
    # Check arguments.
    assert_is_in(
        comparisons_correction,
        ['bonferroni', None],
        label='argument `comparisons_correction`',
    )

    # Switch to run scipy.stats hypothesis test.
    if test == 'Levene':
        stat, pval = stats.levene(box_data1, box_data2, **stats_params)
        result = StatResult('Levene test of variance', 'levene', 'stat', stat,
                            pval)
    elif test == 'Mann-Whitney':
        u_stat, pval = stats.mannwhitneyu(box_data1,
                                          box_data2,
                                          alternative='two-sided',
                                          **stats_params)
        result = StatResult(
            'Mann-Whitney-Wilcoxon test two-sided',
            'M.W.W.',
            'U_stat',
            u_stat,
            pval,
        )
    elif test == 'Mann-Whitney-gt':
        u_stat, pval = stats.mannwhitneyu(box_data1,
                                          box_data2,
                                          alternative='greater',
                                          **stats_params)
        result = StatResult(
            'Mann-Whitney-Wilcoxon test greater',
            'M.W.W.',
            'U_stat',
            u_stat,
            pval,
        )
    elif test == 'Mann-Whitney-ls':
        u_stat, pval = stats.mannwhitneyu(box_data1,
                                          box_data2,
                                          alternative='less',
                                          **stats_params)
        result = StatResult(
            'Mann-Whitney-Wilcoxon test smaller',
            'M.W.W.',
            'U_stat',
            u_stat,
            pval,
        )
    elif test == 't-test_ind':
        stat, pval = stats.ttest_ind(a=box_data1, b=box_data2, **stats_params)
        result = StatResult('t-test independent samples', 't-test_ind', 'stat',
                            stat, pval)
    elif test == 't-test_welch':
        stat, pval = stats.ttest_ind(a=box_data1,
                                     b=box_data2,
                                     equal_var=False,
                                     **stats_params)
        result = StatResult(
            'Welch\'s t-test independent samples',
            't-test_welch',
            'stat',
            stat,
            pval,
        )
    elif test == 't-test_paired':
        stat, pval = stats.ttest_rel(a=box_data1, b=box_data2, **stats_params)
        result = StatResult('t-test paired samples', 't-test_rel', 'stat',
                            stat, pval)
    elif test == 'Wilcoxon':
        zero_method_default = len(box_data1) <= 20 and "pratt" or "wilcox"
        zero_method = stats_params.get('zero_method', zero_method_default)
        print("Using zero_method ", zero_method)
        stat, pval = stats.wilcoxon(box_data1,
                                    box_data2,
                                    zero_method=zero_method,
                                    **stats_params)
        result = StatResult('Wilcoxon test (paired samples)', 'Wilcoxon',
                            'stat', stat, pval)
    elif test == 'Kruskal':
        stat, pval = stats.kruskal(box_data1, box_data2, **stats_params)
        test_short_name = 'Kruskal'
        result = StatResult('Kruskal-Wallis paired samples', 'Kruskal', 'stat',
                            stat, pval)
    else:
        result = StatResult(None, '', None, None, np.nan)

    # Optionally, run multiple comparisons correction.
    if comparisons_correction == 'bonferroni':
        result.pval = bonferroni(result.pval, num_comparisons)
        result.test_str = result.test_str + ' with Bonferroni correction'
    elif comparisons_correction is None:
        pass
    else:
        # This should never be reached because `comparisons_correction` must
        # be a valid correction method or None.
        raise RuntimeError('Unexpectedly reached end of switch.')

    return result
예제 #55
0
def compute_fiber_densities(_band=True, _high_temporal_resolution=False):
    _experiments = all_experiments()
    _experiments = filtering.by_categories(
        _experiments=_experiments,
        _is_single_cell=False,
        _is_high_temporal_resolution=_high_temporal_resolution,
        _is_bleb=False,
        _is_dead_dead=False,
        _is_live_dead=False,
        _is_bead=False,
        _is_metastasis=False)

    _tuples = load.experiments_groups_as_tuples(_experiments)
    _tuples = filtering.by_pair_distance_range(_tuples, PAIR_DISTANCE_RANGE)
    _tuples = filtering.by_real_pairs(_tuples)
    _tuples = filtering.by_band(_tuples, _band=_band)
    print('Total tuples:', len(_tuples))

    _arguments = []
    for _tuple in _tuples:
        _experiment, _series_id, _group = _tuple
        _latest_time_frame = compute.latest_time_frame_before_overlapping(
            _experiment, _series_id, _group, OFFSET_X)
        for _cell_id in ['left_cell', 'right_cell']:
            _arguments.append({
                'experiment': _experiment,
                'series_id': _series_id,
                'group': _group,
                'length_x': QUANTIFICATION_WINDOW_LENGTH_IN_CELL_DIAMETER,
                'length_y': QUANTIFICATION_WINDOW_HEIGHT_IN_CELL_DIAMETER,
                'length_z': QUANTIFICATION_WINDOW_WIDTH_IN_CELL_DIAMETER,
                'offset_x': OFFSET_X,
                'offset_y': OFFSET_Y,
                'offset_z': OFFSET_Z,
                'cell_id': _cell_id,
                'direction': 'inside',
                'time_points': _latest_time_frame
            })

    _windows_dictionary, _windows_to_compute = compute.windows(
        _arguments, _keys=['experiment', 'series_id', 'group', 'cell_id'])
    _fiber_densities = compute.fiber_densities(_windows_to_compute,
                                               _subtract_border=True)

    _experiments_fiber_densities = {
        _key:
        [_fiber_densities[_tuple] for _tuple in _windows_dictionary[_key]]
        for _key in _windows_dictionary
    }

    _tuples_by_experiment = organize.by_experiment(_tuples)

    _correlations_array = []
    _z_positions_array = []
    for _experiment in _tuples_by_experiment:
        print('Experiment:', _experiment)
        _experiment_tuples = _tuples_by_experiment[_experiment]

        for _tuple in tqdm(_experiment_tuples, desc='Main loop'):
            _, _series, _group = _tuple

            _left_cell_fiber_densities = _experiments_fiber_densities[(
                _experiment, _series, _group, 'left_cell')]
            _right_cell_fiber_densities = _experiments_fiber_densities[(
                _experiment, _series, _group, 'right_cell')]

            _properties = load.group_properties(_experiment, _series, _group)
            _left_cell_fiber_densities = compute.remove_blacklist(
                _experiment, _series, _properties['cells_ids']['left_cell'],
                _left_cell_fiber_densities)
            _right_cell_fiber_densities = compute.remove_blacklist(
                _experiment, _series, _properties['cells_ids']['right_cell'],
                _right_cell_fiber_densities)

            _left_cell_fiber_densities_filtered, _right_cell_fiber_densities_filtered = \
                compute.longest_same_indices_shared_in_borders_sub_array(
                    _left_cell_fiber_densities, _right_cell_fiber_densities
                )

            # ignore small arrays
            if len(_left_cell_fiber_densities_filtered
                   ) < compute.minimum_time_frames_for_correlation(
                       _experiment):
                continue

            _correlation = compute_lib.correlation(
                compute_lib.derivative(_left_cell_fiber_densities_filtered,
                                       _n=DERIVATIVE),
                compute_lib.derivative(_right_cell_fiber_densities_filtered,
                                       _n=DERIVATIVE))

            _group_mean_z_position = compute.group_mean_z_position_from_substrate(
                _experiment, _series, _group)

            _correlations_array.append(_correlation)
            _z_positions_array.append(_group_mean_z_position)

    print('Total points:', len(_correlations_array))
    print('Wilcoxon of correlations around the zero:')
    print(wilcoxon(_correlations_array))
    print('Pearson correlation of correlations and z position distances:')
    print(
        compute_lib.correlation(_correlations_array,
                                _z_positions_array,
                                _with_p_value=True))

    return _correlations_array, _z_positions_array
예제 #56
0
            extraTime = 20
            onesVector = np.ones((extraTime * Fs, 1))
            # TODO: usar pading
            # np.pad()
            #posPlot = np.append(arr = posPlot, values = onesVector * posPlot[-1, :])
            #quatPlot = np.append(arr = quatPlot, values = onesVector * quatPlot[-1, :])

            print(name)
            spamwriter.writerow([name, ' - ', pos[len(pos) - 1][1]])
            if ('50hz' in name):
                data50.append(pos[len(pos) - 1][1])
            elif ('25hz' in name):
                data25.append(pos[len(pos) - 1][1])
            else:
                data100.append(pos[len(pos) - 1][1])
        except:
            spamwriter.writerow([name, ' - ', 'Erro ao calcular'])

T1, p1 = stats.wilcoxon(data100,
                        data50,
                        zero_method='wilcox',
                        correction=False,
                        alternative='lesser')
#T2,p2 = stats.wilcoxon(data100, data25, zero_method='wilcox', correction=False)
#T3,p3 = stats.wilcoxon(data50, data25, zero_method='wilcox', correction=False)

print('------------------------')
print(T1, p1)
#print(T2,p2)
#print(T3,p3)
예제 #57
0
#         plt.xlim(-2,45)
#         plt.ylim(-2,55)

        axScatter.set_xscale('log', basex=10)
        axScatter.set_yscale('log', basex=10)
        
        plt.plot([-5,100], [-5,100], 'k--')
        plt.xlim(0.05,100)
        plt.ylim(0.05,100)
        axScatter.tick_params(top=False, right=False, which='both')
        
        ticks=[0.1,1,10,100]
        
        axScatter.set_xticks(ticks)
        axScatter.set_xticklabels(ticks)
        axScatter.set_yticks(ticks)
        axScatter.set_yticklabels(ticks)
     
        extraplots.boxoff(axScatter)
        axScatter.set(adjustable='box-forced', aspect='equal')
        
        axScatter.annotate(panelLabels[indType], xy=(labelPosX[indType+1],labelPosY[0]), xycoords='figure fraction',
                             fontsize=fontSizePanel, fontweight='bold')
        
        pControl = stats.wilcoxon(controlData[0],controlData[1])[1]
        pLaser = stats.wilcoxon(laserData[0],laserData[1])[1]
    
        print "Change in FR for {0} p values:\ncontrol: {1}\nlaser: {2}".format(cellLabels[indType],pControl,pLaser)

if SAVE_FIGURE:
    extraplots.save_figure(figFilename, figFormat, figSize, outputDir)
예제 #58
0
# In[55]:


stats.levene(donnee['Infections'],donnee['Decedes'])


# La pvalue obtenue est inférieure à 5 %, donc  test significatif, rejet de H0, pas d'égalité des variances au niveau 5 %.

# #### On peut tester l'égalité des moyennes à l’aide de t.test:
# H0 : égalité des moyennes
# Contre  H1 : pas d’égalité des moyennes

# In[56]:


stats.wilcoxon(donnee['Infections'],donnee['Decedes'])


# La pvalue obtenue est inférieure à 5 %, donc test significatif, rejet de H0, pas d'égalité des moyennes au niveau 5 %.

# ### les statistiques au niveau monde

# In[57]:


donnee= donnee.sort_values('Infections',ascending=False)

colM1=['Infections','Decedes','Guerisions','RNB','Pop2018']
colM2=['TauxInfections','TauxDecedes','TauxGuerisions','TauxRNB','TauxPop2018','TauxPopRural','TauxSurfRural']

                  np.std(network_dict[n]['dist']))
            if n not in ame_results:
                ame_results[n] = []
            ame_results[n].append(np.mean(network_dict[n]['entropy']))
            print('AMUD :', ame_results[n][-1], '+-',
                  np.std(network_dict[n]['entropy']))
            print('')

    for n in ['1', '3']:
        print('Nd:', n)
        _, p = friedmanchisquare(t_dist[n][0], t_dist[n][1], t_dist[n][2])
        data = np.array(t_dist[n])
        # data = np.argsort(data, axis=0)
        _, pk = f_oneway(data[0], data[2])
        print('ANOVA AMSD :', pk)
        _, p_12 = wilcoxon(t_dist[n][0], t_dist[n][3], zero_method="pratt")
        _, p_13 = wilcoxon(t_dist[n][1], t_dist[n][4], zero_method="pratt")
        _, p_23 = wilcoxon(t_dist[n][2], t_dist[n][5], zero_method="pratt")
        print('FRIEDMAN AMSD :', p, p_12, p_13, p_23)
        print('MEAN AMSD : ', np.mean(data, axis=1), np.std(data, axis=1))
        print('MEAN AMSD : ', np.argsort(data[:2], axis=0).mean(axis=1))
        print('MEAN AMSD : ', np.argsort(data[1:], axis=0).mean(axis=1))
        _, p = friedmanchisquare(t_entropy[n][0], t_entropy[n][1],
                                 t_entropy[n][2])
        data = np.array(t_entropy[n])
        # data = np.argsort(data, axis=0)
        _, pk = f_oneway(data[0], data[2])
        print('KRUSKAL AMSD :', pk)
        _, p_12 = wilcoxon(t_entropy[n][0],
                           t_entropy[n][1],
                           zero_method="pratt")
예제 #60
0
import scipy.stats as stats

DeepCS_MRR_top1_10times_bootstrap = [
    0.2409, 0.2361, 0.2312, 0.2349, 0.2346, 0.2401, 0.2289, 0.2321, 0.2369,
    0.2283
]

TSACS_TASF_MRR_10times_bootstrap = [
    0.5735977777777758, 0.567866785714284, 0.5669624999999976,
    0.5727877777777753, 0.5653392063492049, 0.5681766269841253,
    0.5569228968253946, 0.5615689682539661, 0.5645849603174583,
    0.563360317460315
]

print(
    stats.wilcoxon(DeepCS_MRR_top1_10times_bootstrap,
                   TSACS_TASF_MRR_10times_bootstrap))