Example #1
0
def compute_friedman_test(best_test_score):
    """
    """
   
    ## test pair combinations 
    pairs_test = [('individual', 'union', 'mtl'), ('individual', 'union', 'mtmkl'), \
        ('individual', 'mtl', 'mtmkl'), ('union', 'mtl', 'mtmkl')]
    org_names = best_test_score.keys()

    ##ttest_p_val = numpy.zeros((len(org_names), len(pairs_test)))
    ttest_p_val = numpy.zeros((len(pairs_test), len(org_names)))

    for org_idx, org_code in enumerate(org_names):
        meth_perf = best_test_score[org_code]

        for pair_idx, rel_pair in enumerate(pairs_test):
            t_stats, p_val = stats.friedmanchisquare(meth_perf[rel_pair[0]], meth_perf[rel_pair[1]], meth_perf[rel_pair[2]])
            
            ##ttest_p_val[org_idx, pair_idx] = p_val
            ttest_p_val[pair_idx, org_idx] = p_val
        
    
    ##df_pval = pandas.DataFrame(ttest_p_val, columns=pairs_test, index=org_names)
    df_pval = pandas.DataFrame(ttest_p_val, columns=org_names, index=pairs_test)
    
    return df_pval 
Example #2
0
def friedman_chi(data):
    """
    non parametric
    many samples
    dependent
    """
    F, pval = st.friedmanchisquare(*data)
    return (F, pval)
Example #3
0
def task_friedman(dataOne, dataTwo, dataThree):
    """
    http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.friedmanchisquare.html
    """

    chi, pvalue = friedmanchisquare(array(dataOne), array(dataTwo), array(dataThree))

    return chi, pvalue
Example #4
0
def test_friedmanchisquare():
    # see ticket:113
    # verified with matlab and R
    #From Demsar "Statistical Comparisons of Classifiers over Multiple Data Sets"
    #2006, Xf=9.28 (no tie handling, tie corrected Xf >=9.28)
    x1 = [array([0.763, 0.599, 0.954, 0.628, 0.882, 0.936, 0.661, 0.583,
                 0.775, 1.0, 0.94, 0.619, 0.972, 0.957]),
          array([0.768, 0.591, 0.971, 0.661, 0.888, 0.931, 0.668, 0.583,
                 0.838, 1.0, 0.962, 0.666, 0.981, 0.978]),
          array([0.771, 0.590, 0.968, 0.654, 0.886, 0.916, 0.609, 0.563,
                 0.866, 1.0, 0.965, 0.614, 0.9751, 0.946]),
          array([0.798, 0.569, 0.967, 0.657, 0.898, 0.931, 0.685, 0.625,
                 0.875, 1.0, 0.962, 0.669, 0.975, 0.970])]

    #From "Bioestadistica para las ciencias de la salud" Xf=18.95 p<0.001:
    x2 = [array([4,3,5,3,5,3,2,5,4,4,4,3]),
          array([2,2,1,2,3,1,2,3,2,1,1,3]),
          array([2,4,3,3,4,3,3,4,4,1,2,1]),
          array([3,5,4,3,4,4,3,3,3,4,4,4])]

    #From Jerrorl H. Zar, "Biostatistical Analysis"(example 12.6), Xf=10.68, 0.005 < p < 0.01:
    #Probability from this example is inexact using Chisquare aproximation of Friedman Chisquare.
    x3 = [array([7.0,9.9,8.5,5.1,10.3]),
          array([5.3,5.7,4.7,3.5,7.7]),
          array([4.9,7.6,5.5,2.8,8.4]),
          array([8.8,8.9,8.1,3.3,9.1])]


    assert_array_almost_equal(stats.friedmanchisquare(x1[0],x1[1],x1[2],x1[3]),(10.2283464566929, 0.0167215803284414))
    assert_array_almost_equal(stats.friedmanchisquare(x2[0],x2[1],x2[2],x2[3]),(18.9428571428571, 0.000280938375189499))
    assert_array_almost_equal(stats.friedmanchisquare(x3[0],x3[1],x3[2],x3[3]),(10.68, 0.0135882729582176))
    np.testing.assert_raises(ValueError, stats.friedmanchisquare,x3[0],x3[1])

    # test using mstats
    assert_array_almost_equal(stats.mstats.friedmanchisquare(x1[0],x1[1],x1[2],x1[3]),(10.2283464566929, 0.0167215803284414))
    # the following fails
    #assert_array_almost_equal(stats.mstats.friedmanchisquare(x2[0],x2[1],x2[2],x2[3]),(18.9428571428571, 0.000280938375189499))
    assert_array_almost_equal(stats.mstats.friedmanchisquare(x3[0],x3[1],x3[2],x3[3]),(10.68, 0.0135882729582176))
    np.testing.assert_raises(ValueError,stats.mstats.friedmanchisquare,x3[0],x3[1])
Example #5
0
File: p4.py Project: i72sijia/IMD
    clasificar_HistGradientBoostingClassifier(
        X_labor, y_labor, df_labor, trI_labor, trO_labor, teI_labor, teO_labor,
        'P4_labor_HistGradientBoostingClassifier.svg'),
    clasificar_HistGradientBoostingClassifier(
        X_vote, y_vote, df_vote, trI_vote, trO_vote, teI_vote, teO_vote,
        'P4_vote_HistGradientBoostingClassifier.svg'),
    clasificar_HistGradientBoostingClassifier(
        X_car, y_car, df_car, trI_car, trO_car, teI_car, teO_car,
        'P4_car_HistGradientBoostingClassifier.svg'),
    clasificar_HistGradientBoostingClassifier(
        X_bank, y_bank, df_bank, trI_bank, trO_bank, teI_bank, teO_bank,
        'P4_bank_HistGradientBoostingClassifier.svg')
])

friedman = friedmanchisquare(arrayScores_dt, arrayScores_knn, arrayScores_svm,
                             arrayScores_bagging,
                             arrayScores_GradientBoostingClassifier,
                             arrayScores_HistGradientBoostingClassifier)

print("\n  FRIEDMAN => " + str(friedman) + "\n")

iman_davenport = ((10 - 1) * friedman[0]) / (10 * (6 - 1) - friedman[0])

print("\n  IMAN-DAVENPORT => " + str(iman_davenport) + "\n")

wilcoxon_KNN_Bagging = wilcoxon(arrayScores_knn, arrayScores_bagging)

print("\n  WILCOXON [KNN VS. BAGGING] => " + str(wilcoxon_KNN_Bagging) + "\n")

wilcoxon_KNN_GradientBoostingClassifier = wilcoxon(
    arrayScores_knn, arrayScores_GradientBoostingClassifier)
sns.set(color_codes=True)

plt.boxplot(means_acc_knn)
plt.show()
plt.boxplot(means_acc_gauss)
plt.show()
plt.boxplot(means_acc_parzen)
plt.show()

sns.distplot(means_acc_knn)
plt.show()
sns.distplot(means_acc_gauss)
plt.show()
sns.distplot(means_acc_parzen)
plt.show()

sns.kdeplot(means_acc_knn, label="KNN")
sns.kdeplot(means_acc_gauss, label="Gauss")
sns.kdeplot(means_acc_parzen, label="Parzen")
plt.legend()

# Friedman test

stat, p = friedmanchisquare(means_acc_knn, means_acc_gauss, means_acc_parzen)
print('Statistics=%.3f, p=%.3f' % (stat, p))

alpha = 0.05
if p > alpha:
    print('Same distributions (fail to reject H0)')
else:
    print('Different distributions (reject H0)')
Example #7
0
def print_full_analysis(df, field, h=1, by="tid", nt=stats.shapiro,
                        gd=True, sd=True, ffa=True, ovr=True, ovo=True):
    """Performs a full analysis of the data

    :df: THE data frame
    :field: field of interest such as 'time_ms' or 'success'
    :by: perform grouping on this attribute (the higher level split on
    "navigation" is always performed)
    :h: header level to start with
    :returns: nothing.

    """
    if by is None:
        sd = ffa = ovr = False

    nav_methods = df.groupby("navigation")
    if ffa or sd:
        groups = df.groupby(["navigation", by])[field]

    print_md_header(h, "Descriptions ({})".format(field))

    # Global Descriptions
    if gd:
        print_md_header(h+1, "Global Descriptions ({})".format(field))
        print_full_description(nav_methods[field], h=h+2, norm_test=nt)

    # Sample Descriptions
    if sd:
        print_md_header(h+1, "Repeated measures ({})".format(field))
        for desc, group in nav_methods:
            subgroups = [values for _, values in group.groupby(by)[field]]
            print_md_header(h+2, desc)
            print_md_paragraph(stats.kruskal(*subgroups),
                               stats.friedmanchisquare(*subgroups),
                               # stats.f_oneway(*subgroups),
                               lineblock=True)
        print_md_header(h+1, "Descriptions per {} ({})".format(by, field))
        print_full_description(groups, h=h+2, norm_test=nt)

    # free for all
    if ffa:
        print_md_header(h, "Cross-compare Tests per {} ({})".format(by, field))
        for desc, result in cross_compare(*groups, norm_test=nt):
            print_md_header(h+1, "{} vs {}".format(*desc))
            print_md_paragraph(result)

    # one vs rest
    if ovr:
        print_md_header(
            h, "Global Burger vs Swipe per {} Tests ({})".format(
                by, field))
        burger = "burger", nav_methods.get_group("burger")[field]
        swipe_tasks = nav_methods.get_group("swipe").groupby(by)[field]
        for desc, result in one_vs_rest(burger, *swipe_tasks,
                                        norm_test=nt):
            print_md_header(h+1, "{} vs swipe {}".format(*desc))
            print_md_paragraph(result)

    # one vs one
    if ovo:
        print_md_header(h,
                        "Global Burger vs Global Swipe Test ({})"
                        .format(field))
        for desc, result in cross_compare(*nav_methods[field],
                                          norm_test=nt):
            print_md_header(h+1, "{} vs {}".format(*desc))
            print_md_paragraph(result)
Example #8
0
			'P5_car_ECOC.svg'
		),
		clasificar_ECOC(
			X_iris_2D,
			y_iris_2D,
			df_iris_2D,
			trI_iris_2D,
			trO_iris_2D,
			teI_iris_2D,
			teO_iris_2D,
			'P5_iris_2D_ECOC.svg'
		)
	]
)

friedman = friedmanchisquare(arrayScores_dt, arrayScores_OVO, arrayScores_OVA, arrayScores_ECOC)
print("\n  FRIEDMAN => " + str(friedman) + "\n")

iman_davenport = ((10-1)*friedman[0])/(10*(4-1)-friedman[0])
print("\n  IMAN-DAVENPORT => " + str(iman_davenport) + "\n")

wilcoxon_DT_OVO = wilcoxon(arrayScores_dt, arrayScores_OVO)
print("\n  WILCOXON [DT VS. OVO] => " + str(wilcoxon_DT_OVO) + "\n")

wilcoxon_DT_OVA = wilcoxon(arrayScores_dt, arrayScores_OVA)
print("\n  WILCOXON [DT VS. OVA] => " + str(wilcoxon_DT_OVA) + "\n")

wilcoxon_DT_ECOC = wilcoxon(arrayScores_dt, arrayScores_ECOC)
print("\n  WILCOXON [DT VS. ECOC] => " + str(wilcoxon_DT_ECOC) + "\n")

wilcoxon_OVO_OVA = wilcoxon(arrayScores_OVO, arrayScores_OVA)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--infile', required=True, help='Tabular file.')
    parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument("--mwu_use_continuity", action="store_true", default = False,
                    help="Whether a continuity correction (1/2.) should be taken into account.")
    parser.add_argument("--equal_var", action="store_true", default = False,
                    help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.")
    parser.add_argument("--reta", action="store_true", default = False,
                    help="Whether or not to return the internally computed a values.")
    parser.add_argument("--fisher", action="store_true", default = False,
                    help="if true then Fisher definition is used")
    parser.add_argument("--bias", action="store_true", default = False,
                    help="if false,then the calculations are corrected for statistical bias")
    parser.add_argument("--inclusive1", action="store_true", default= False ,
                    help="if false,lower_limit will be ignored")
    parser.add_argument("--inclusive2", action="store_true", default = False,
                    help="if false,higher_limit will be ignored")
    parser.add_argument("--inclusive", action="store_true", default = False,
                    help="if false,limit will be ignored")
    parser.add_argument("--printextras", action="store_true", default = False,
                    help="If True, if there are extra points a warning is raised saying how many of those points there are")
    parser.add_argument("--initial_lexsort", action="store_true", default="False",
                    help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.")
    parser.add_argument("--correction", action="store_true", default = False,
                    help="continuity correction ")
    parser.add_argument("--axis", type=int, default=0,
                    help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)")
    parser.add_argument("--n", type=int, default=0,
                    help="the number of trials. This is ignored if x gives both the number of successes and failures")
    parser.add_argument("--b", type=int, default=0,
                    help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0,
                    help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0,
                    help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0,
                    help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0,
                    help="limits")
    parser.add_argument("--mf", type=float, default=2.0,
                    help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9,
                    help="higher_limit")
    parser.add_argument("--p", type=float, default=0.5,
                    help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5")
    parser.add_argument("--alpha", type=float, default=0.9,
                    help="probability")
    parser.add_argument("--new", type=float, default=0.0,
                    help="Value to put in place of values in a outside of bounds")
    parser.add_argument("--proportiontocut", type=float, default=0.0,
                    help="Proportion (in range 0-1) of total data set to trim of each end.")
    parser.add_argument("--lambda_", type=float, default=1.0,
                    help="lambda_ gives the power in the Cressie-Read power divergence statistic")
    parser.add_argument("--imbda", type=float, default=0,
                    help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.")
    parser.add_argument("--base", type=float, default=1.6,
                    help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, 'w+')
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(';'):
            barlett_samples.append( map(int, sample.split(',')) )
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(',')
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(',')
        sample2 = 1
    for line in open( infile ):
        sample_one = []
        sample_two = []
        cols = line.strip().split('\t')
        if sample0 == 1:
            b_samples = columns_to_values( barlett_samples,line )
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append( cols[ int(index) -1 ] )
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append( cols[ int(index) -1 ] )
        if test_id.strip() == 'describe':
            size, min_max,mean,uv,bs,bk = stats.describe( map(float, sample_one) )
            cols.append( size )
            cols.append( min_max )
            cols.append( mean )
            cols.append( uv )
            cols.append( bs )
            cols.append( bk )
        elif test_id.strip() == 'mode':
            vals, counts = stats.mode( map(float, sample_one) )
            cols.append( vals )
            cols.append( counts )
        elif test_id.strip() == 'nanmean':
            m = stats.nanmean( map(float, sample_one))
            cols.append( m )
        elif test_id.strip() == 'nanmedian':
            m = stats.nanmedian( map(float, sample_one))
            cols.append( m )
        elif test_id.strip() == 'kurtosistest':
            z_value, p_value = stats.kurtosistest( map(float, sample_one) )
            cols.append( z_value )
            cols.append( p_value )
        elif test_id.strip() == 'variation':
            ra = stats.variation( map(float, sample_one))
            cols.append( ra )
        elif test_id.strip() == 'itemfreq':
            freq = stats.itemfreq( map(float, sample_one))
            for list in freq:
                elements = ','.join( map(str, list) )
                cols.append( elements )
        elif test_id.strip() == 'nanmedian':
            m = stats.nanmedian( map(float, sample_one))
            cols.append( m )
        elif test_id.strip() == 'variation':
            ra = stats.variation( map(float, sample_one))
            cols.append( ra )
        elif test_id.strip() == 'boxcox_llf':
            IIf = stats.boxcox_llf( imbda,map(float, sample_one) )
            cols.append( IIf )
        elif test_id.strip() == 'tiecorrect':
            fa = stats.tiecorrect( map(float, sample_one) )
            cols.append( fa )
        elif test_id.strip() == 'rankdata':
            r = stats.rankdata( map(float, sample_one),method=args.md )
            cols.append( r )
        elif test_id.strip() == 'nanstd':
            s = stats.nanstd( map(float, sample_one),bias=args.bias )
            cols.append( s )
        elif test_id.strip() == 'anderson':
            A2, critical, sig = stats.anderson( map(float, sample_one), dist=args.dist )
            cols.append( A2 )
            for list in critical:
                cols.append( list )
            cols.append( ',' )
            for list in sig:
                cols.append( list )
        elif test_id.strip() == 'binom_test':
            p_value = stats.binom_test( map(float, sample_one), n=args.n, p=args.p )
            cols.append( p_value )
        elif test_id.strip() == 'gmean':
            gm = stats.gmean( map(float, sample_one), dtype=args.dtype )
            cols.append( gm )
        elif test_id.strip() == 'hmean':
            hm = stats.hmean( map(float, sample_one), dtype=args.dtype )
            cols.append( hm )
        elif test_id.strip() == 'kurtosis':
            k = stats.kurtosis( map(float, sample_one),axis=args.axis, fisher=args.fisher, bias=args.bias )
            cols.append( k )
        elif test_id.strip() == 'moment':
            n_moment = stats.moment( map(float, sample_one),n=args.n )
            cols.append( n_moment )
        elif test_id.strip() == 'normaltest':
            k2, p_value = stats.normaltest( map(float, sample_one) )
            cols.append( k2 )
            cols.append( p_value )
        elif test_id.strip() == 'skew':
            skewness = stats.skew( map(float, sample_one),bias=args.bias )
            cols.append( skewness )
        elif test_id.strip() == 'skewtest':
            z_value, p_value = stats.skewtest( map(float, sample_one))
            cols.append( z_value )
            cols.append( p_value )
        elif test_id.strip() == 'sem':
            s = stats.sem( map(float, sample_one),ddof=args.ddof )
            cols.append( s )
        elif test_id.strip() == 'zscore':
            z = stats.zscore( map(float, sample_one),ddof=args.ddof )
            for list in z:
                cols.append( list )
        elif test_id.strip() == 'signaltonoise':
            s2n = stats.signaltonoise( map(float, sample_one),ddof=args.ddof )
            cols.append( s2n )
        elif test_id.strip() == 'percentileofscore':
            p = stats.percentileofscore( map(float, sample_one),score=args.score,kind=args.kind )
            cols.append( p )
        elif test_id.strip() == 'bayes_mvs':
            c_mean, c_var,c_std = stats.bayes_mvs( map(float, sample_one),alpha=args.alpha )
            cols.append( c_mean )
            cols.append( c_var )
            cols.append( c_std )
        elif test_id.strip() == 'sigmaclip':
            c, c_low,c_up = stats.sigmaclip( map(float, sample_one),low=args.m,high=args.n )
            cols.append( c )
            cols.append( c_low )
            cols.append( c_up )
        elif test_id.strip() == 'kstest':
            d, p_value = stats.kstest(map(float, sample_one), cdf=args.cdf , N=args.N,alternative=args.alternative,mode=args.mode )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == 'chi2_contingency':
            chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction ,lambda_=args.lambda_)
            cols.append( chi2 )
            cols.append( p )
            cols.append( dof )
            cols.append( ex )
        elif test_id.strip() == 'tmean':
            if nf is 0 and mf is 0:
                mean = stats.tmean( map(float, sample_one))
            else:
                mean = stats.tmean( map(float, sample_one),( mf, nf ),( inclusive1, inclusive2 ))
            cols.append( mean )
        elif test_id.strip() == 'tmin':
            if mf is 0:
                min = stats.tmin( map(float, sample_one))
            else:
                min = stats.tmin( map(float, sample_one),lowerlimit=mf,inclusive=args.inclusive)
            cols.append( min )
        elif test_id.strip() == 'tmax':
            if nf is 0:
                max = stats.tmax( map(float, sample_one))
            else:
                max = stats.tmax( map(float, sample_one),upperlimit=nf,inclusive=args.inclusive)
            cols.append( max )
        elif test_id.strip() == 'tvar':
            if nf is 0 and mf is 0:
                var = stats.tvar( map(float, sample_one))
            else:
                var = stats.tvar( map(float, sample_one),( mf, nf ),( inclusive1, inclusive2 ))
            cols.append( var )
        elif test_id.strip() == 'tstd':
            if nf is 0 and mf is 0:
                std = stats.tstd( map(float, sample_one))
            else:
                std = stats.tstd( map(float, sample_one),( mf, nf ),( inclusive1, inclusive2 ))
            cols.append( std )
        elif test_id.strip() == 'tsem':
            if nf is 0 and mf is 0:
                s = stats.tsem( map(float, sample_one))
            else:
                s = stats.tsem( map(float, sample_one),( mf, nf ),( inclusive1, inclusive2 ))
            cols.append( s )
        elif test_id.strip() == 'scoreatpercentile':
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile( map(float, sample_one),map(float, sample_two),interpolation_method=args.interpolation )
            else:
                s = stats.scoreatpercentile( map(float, sample_one),map(float, sample_two),( mf, nf ),interpolation_method=args.interpolation )
            for list in s:
                cols.append( list )
        elif test_id.strip() == 'relfreq':
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq( map(float, sample_one),args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq( map(float, sample_one),args.b,( mf, nf ))
            for list in rel:
                cols.append( list )
            cols.append( low_range )
            cols.append( binsize )
            cols.append( ex )
        elif test_id.strip() == 'binned_statistic':
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic( map(float, sample_one),map(float, sample_two),statistic=args.statistic,bins=args.b )
            else:
                st, b_edge, b_n = stats.binned_statistic( map(float, sample_one),map(float, sample_two),statistic=args.statistic,bins=args.b,range=( mf, nf ) )
            cols.append( st )
            cols.append( b_edge )
            cols.append( b_n )
        elif test_id.strip() == 'threshold':
            if nf is 0 and mf is 0:
                o = stats.threshold( map(float, sample_one),newval=args.new )
            else:
                o = stats.threshold( map(float, sample_one),mf,nf,newval=args.new )
            for list in o:
                cols.append( list )
        elif test_id.strip() == 'trimboth':
            o = stats.trimboth( map(float, sample_one),proportiontocut=args.proportiontocut )
            for list in o:
                cols.append( list )
        elif test_id.strip() == 'trim1':
            t1 = stats.trim1( map(float, sample_one),proportiontocut=args.proportiontocut,tail=args.tail )
            for list in t1:
                cols.append( list )
        elif test_id.strip() == 'histogram':
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram( map(float, sample_one),args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram( map(float, sample_one),args.b,( mf, nf ))
            cols.append( hi )
            cols.append( low_range )
            cols.append( binsize )
            cols.append( ex )
        elif test_id.strip() == 'cumfreq':
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq( map(float, sample_one),args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq( map(float, sample_one),args.b,( mf, nf ))
            cols.append( cum )
            cols.append( low_range )
            cols.append( binsize )
            cols.append( ex )
        elif test_id.strip() == 'boxcox_normmax':
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax( map(float, sample_one))
            else:
                ma = stats.boxcox_normmax( map(float, sample_one),( mf, nf ),method=args.method)
            cols.append( ma )
        elif test_id.strip() == 'boxcox':
            if imbda is 0:
                box, ma, ci = stats.boxcox( map(float, sample_one),alpha=args.alpha )
                cols.append( box )
                cols.append( ma )
                cols.append( ci )
            else:
                box = stats.boxcox( map(float, sample_one),imbda,alpha=args.alpha )
                cols.append( box )
        elif test_id.strip() == 'histogram2':
            h2 = stats.histogram2( map(float, sample_one), map(float, sample_two) )
            for list in h2:
                cols.append( list )
        elif test_id.strip() == 'ranksums':
            z_statistic, p_value = stats.ranksums( map(float, sample_one), map(float, sample_two) )
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == 'ttest_1samp':
            t, prob = stats.ttest_1samp( map(float, sample_one), map(float, sample_two) )
            for list in t:
                cols.append( list )
            for list in prob:
                cols.append( list )
        elif test_id.strip() == 'ansari':
            AB, p_value = stats.ansari( map(float, sample_one), map(float, sample_two) )
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == 'linregress':
            slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two) )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == 'pearsonr':
            cor, p_value = stats.pearsonr( map(float, sample_one), map(float, sample_two) )
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == 'pointbiserialr':
            r, p_value = stats.pointbiserialr( map(float, sample_one), map(float, sample_two) )
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == 'ks_2samp':
            d, p_value = stats.ks_2samp( map(float, sample_one), map(float, sample_two) )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == 'mannwhitneyu':
            mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity )
            cols.append( mw_stats_u )
            cols.append( p_value )
        elif test_id.strip() == 'zmap':
            z = stats.zmap( map(float, sample_one),map(float, sample_two),ddof=args.ddof )
            for list in z:
                cols.append( list )
        elif test_id.strip() == 'ttest_ind':
            mw_stats_u, p_value = stats.ttest_ind( map(float, sample_one), map(float, sample_two), equal_var=args.equal_var )
            cols.append( mw_stats_u )
            cols.append( p_value )
        elif test_id.strip() == 'ttest_rel':
            t, prob = stats.ttest_rel( map(float, sample_one), map(float, sample_two), axis=args.axis )
            cols.append( t )
            cols.append( prob )
        elif test_id.strip() == 'mood':
            z, p_value = stats.mood( map(float, sample_one), map(float, sample_two), axis=args.axis )
            cols.append( z )
            cols.append( p_value )
        elif test_id.strip() == 'shapiro':
            W, p_value, a = stats.shapiro( map(float, sample_one), map(float, sample_two), args.reta )
            cols.append( W )
            cols.append( p_value )
            for list in a:
                cols.append( list )
        elif test_id.strip() == 'kendalltau':
            k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == 'entropy':
            s = stats.entropy( map(float, sample_one), map(float, sample_two), base=args.base )
            cols.append( s )
        elif test_id.strip() == 'spearmanr':
            if sample2 == 1 :
                rho, p_value = stats.spearmanr( map(float, sample_one), map(float, sample_two) )
            else:
                rho, p_value = stats.spearmanr( map(float, sample_one))
            cols.append( rho )
            cols.append( p_value )
        elif test_id.strip() == 'wilcoxon':
            if sample2 == 1 :
                T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two),zero_method=args.zero_method,correction=args.correction )
            else:
                T, p_value = stats.wilcoxon( map(float, sample_one),zero_method=args.zero_method,correction=args.correction )
            cols.append( T )
            cols.append( p_value )
        elif test_id.strip() == 'chisquare':
            if sample2 == 1 :
                rho, p_value = stats.chisquare( map(float, sample_one), map(float, sample_two),ddof=args.ddof )
            else:
                rho, p_value = stats.chisquare( map(float, sample_one),ddof=args.ddof)
            cols.append( rho )
            cols.append( p_value )
        elif test_id.strip() == 'power_divergence':
            if sample2 == 1 :
                stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two),ddof=args.ddof,lambda_=args.lambda_ )
            else:
                stat, p_value = stats.power_divergence( map(float, sample_one),ddof=args.ddof,lambda_=args.lambda_)
            cols.append( stat )
            cols.append( p_value )
        elif test_id.strip() == 'theilslopes':
            if sample2 == 1 :
                mpe, met, lo, up = stats.theilslopes( map(float, sample_one), map(float, sample_two),alpha=args.alpha )
            else:
                mpe, met, lo, up = stats.theilslopes( map(float, sample_one),alpha=args.alpha)
            cols.append( mpe )
            cols.append( met )
            cols.append( lo )
            cols.append( up )
        elif test_id.strip() == 'combine_pvalues':
            if sample2 == 1 :
                stat, p_value = stats.combine_pvalues( map(float, sample_one),method=args.med,weights=map(float, sample_two) )
            else:
                stat, p_value = stats.combine_pvalues( map(float, sample_one),method=args.med)
            cols.append( stat )
            cols.append( p_value )
        elif test_id.strip() == 'obrientransform':
            ob = stats.obrientransform( *b_samples )
            for list in ob:
                elements = ','.join( map(str, list) )
                cols.append( elements )
        elif test_id.strip() == 'f_oneway':
            f_value, p_value = stats.f_oneway( *b_samples )
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == 'kruskal':
            h, p_value = stats.kruskal( *b_samples )
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == 'friedmanchisquare':
            fr, p_value = stats.friedmanchisquare( *b_samples )
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == 'fligner':
            xsq, p_value = stats.fligner( center=args.center,proportiontocut=args.proportiontocut,*b_samples )
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == 'bartlett':
            T, p_value = stats.bartlett( *b_samples )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == 'levene':
            w, p_value = stats.levene( center=args.center,proportiontocut=args.proportiontocut,*b_samples )
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == 'median_test':
            stat, p_value, m, table = stats.median_test( ties=args.ties,correction=args.correction ,lambda_=args.lambda_,*b_samples )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ','.join( map(str, list) )
                cols.append( elements )
        outfile.write( '%s\n' % '\t'.join( map(str, cols) ) )
    outfile.close()
Example #10
0
def wilcoxon_holm(alpha=0.05, df_perf=None):
    """
    Applies the wilcoxon signed rank test between each pair of algorithm and then use Holm
    to reject the null's hypothesis
    """
    print(pd.unique(df_perf['classifier_name']))
    # count the number of tested datasets per classifier
    df_counts = pd.DataFrame({
        'count':
        df_perf.groupby(['classifier_name']).size()
    }).reset_index()
    # get the maximum number of tested datasets
    max_nb_datasets = df_counts['count'].max()
    # get the list of classifiers who have been tested on nb_max_datasets
    classifiers = list(df_counts.loc[df_counts['count'] == max_nb_datasets]
                       ['classifier_name'])
    # test the null hypothesis using friedman before doing a post-hoc analysis
    friedman_p_value = friedmanchisquare(
        *(np.array(df_perf.loc[df_perf['classifier_name'] == c]['accuracy'])
          for c in classifiers))[1]
    if friedman_p_value >= alpha:
        # then the null hypothesis over the entire classifiers cannot be rejected
        print(
            'the null hypothesis over the entire classifiers cannot be rejected'
        )
        exit()
    # get the number of classifiers
    m = len(classifiers)
    # init array that contains the p-values calculated by the Wilcoxon signed rank test
    p_values = []
    # loop through the algorithms to compare pairwise
    for i in range(m - 1):
        # get the name of classifier one
        classifier_1 = classifiers[i]
        # get the performance of classifier one
        perf_1 = np.array(df_perf.loc[df_perf['classifier_name'] ==
                                      classifier_1]['accuracy'],
                          dtype=np.float64)
        for j in range(i + 1, m):
            # get the name of the second classifier
            classifier_2 = classifiers[j]
            # get the performance of classifier one
            perf_2 = np.array(df_perf.loc[df_perf['classifier_name'] ==
                                          classifier_2]['accuracy'],
                              dtype=np.float64)
            # calculate the p_value
            p_value = wilcoxon(perf_1, perf_2, zero_method='pratt')[1]
            # appen to the list
            p_values.append((classifier_1, classifier_2, p_value, False))
    # get the number of hypothesis
    k = len(p_values)
    # sort the list in acsending manner of p-value
    p_values.sort(key=operator.itemgetter(2))

    # loop through the hypothesis
    for i in range(k):
        # correct alpha with holm
        new_alpha = float(alpha / (k - i))
        # test if significant after holm's correction of alpha
        if p_values[i][2] <= new_alpha:
            p_values[i] = (p_values[i][0], p_values[i][1], p_values[i][2],
                           True)
        else:
            # stop
            break
    # compute the average ranks to be returned (useful for drawing the cd diagram)
    # sort the dataframe of performances
    sorted_df_perf = df_perf.loc[df_perf['classifier_name'].isin(classifiers)]. \
        sort_values(['classifier_name', 'dataset_name'])
    # get the rank data
    rank_data = np.array(sorted_df_perf['accuracy']).reshape(
        m, max_nb_datasets)

    # create the data frame containg the accuracies
    df_ranks = pd.DataFrame(data=rank_data,
                            index=np.sort(classifiers),
                            columns=np.unique(sorted_df_perf['dataset_name']))

    # number of wins
    dfff = df_ranks.rank(ascending=False)
    print(dfff[dfff == 1.0].sum(axis=1))

    # average the ranks
    average_ranks = df_ranks.rank(ascending=False).mean(axis=1).sort_values(
        ascending=False)
    # return the p-values and the average ranks
    return p_values, average_ranks, max_nb_datasets
Example #11
0
Pairwise1 = np.stack((np.array(TableMCC["LR"]), np.array(TableMCC["C-SVM"])))

PairwiseTotal = np.stack(
    (np.array(TableMCC["LR"]), np.array(TableMCC["C-SVMRBF"]),
     np.array(TableMCC["C-SVM"]), np.array(TableMCC["RF"]),
     np.array(TableMCC["ANN"]), np.array(TableMCC["KNN"]),
     np.array(TableMCC["Naive"])))

nr, nc = PairwiseTotal.shape
BR = ro.r.matrix(PairwiseTotal, nrow=nr, ncol=nc)
BR = BR.transpose()
print(BR)

friedmanchisquare(TableMCC["LR"], TableMCC["C-SVMRBF"], TableMCC["C-SVM"],
                  TableMCC["RF"], TableMCC["ANN"], TableMCC["KNN"],
                  TableMCC["Naive"])

print(PMCMRplus.friedmanTest(BR))
print(PMCMRplus.friedmanTest(BR).rx2("statistic"))
print(PMCMRplus.friedmanTest(BR).rx2("p.value"))
print(PMCMRplus.friedmanTest(BR, dist="FDist"))
print(PMCMRplus.friedmanTest(BR, dist="FDist").rx2("statistic"))
print(PMCMRplus.friedmanTest(BR, dist="FDist").rx2("p.value"))

BRNum = ro.r.matrix(ro.r.c(BR),
                    nrow=12,
                    ncol=7,
                    dimnames=ro.r.list(
                        ro.r.c("Adult", "Alzheimer", "Bank", "Breast", "Cell",
                               "Contra", "Aus", "Ger", "Iris", "Sonar", "Wine",
Example #12
0
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 28 22:06:49 2018

@author: Delgado
"""

import numpy as np
import scipy as sp
import scipy.stats as sps
sol = sps.friedmanchisquare([79.52, 92.06, 79.59], [43.38, 54.54, 46.82],
                            [79.43, 88.60, 79.57])
Example #13
0
    'dataset': dataset_names
}  # will contain one row per dataset and models over columns
groups_by_model = df_results.groupby('Algorithm Name')
for model_name in model_names:
    df_model = groups_by_model.get_group(model_name)
    groups_by_dataset = df_model.groupby('Dataset Name')
    model_mean = []
    for dataset_name in dataset_names:
        model_mean.append(
            groups_by_dataset.get_group(dataset_name)
            [metric].mean())  # average over folds
    average_results[model_name] = model_mean
df_results = pd.DataFrame(average_results)
df_results.to_csv('results/average_results.csv', index=False)

# save ranks of algorithms (1 is best, |models| is worst)
df = df_results.drop(columns='dataset')
ranks = rankdata(df.to_numpy(), method='dense', axis=1)
df = pd.DataFrame(ranks, columns=df.columns)
df['dataset'] = df_results['dataset']
df.to_csv('results/ranks.csv')

# friedman and post hoc tests
t_stat, p_val = friedmanchisquare(*[df_results[i] for i in model_names])
print('\nfriedman test p-val = %s' % p_val)
post_hoc_p_vals = posthoc_nemenyi_friedman(
    df_results.drop(columns='dataset').to_numpy())
post_hoc_p_vals.columns = model_names
print('\npost hoc p-vals:\n%s' % post_hoc_p_vals)
post_hoc_p_vals.to_csv('results/post_hoc.csv', index=False)
Example #14
0
    -118.46336159, -118.48487121, -118.44997418, -118.45399572, -118.47371284
]

#Rastrigin Function
cost_hill = [
    3757.74744873, 3136.04744873, 3544.18744873, 3470.84744873, 3560.48744873,
    3318.34744873, 3208.80744873, 2954.96744873, 3324.58744873, 2725.18744873
]
cost_simulated = [
    3160.80744873, 2728.52744873, 3899.78744873, 3540.60744873, 3322.78744873,
    3687.14744873, 2733.22744873, 3421.12744873, 3619.34744873, 3037.52744873
]
cost_genetico = [
    1518.74744873, 1458.26744873, 1573.86744873, 1680.00744873, 1253.10744873,
    1675.40744873, 1542.90744873, 1516.92744873, 1409.98744873, 1603.82744873
]
cost_pso = [
    2635.83, 2423.98, 2598.48, 2635.53, 2533.04, 2627.55, 2590.66, 2634.54,
    2578.27, 2550.38
]

print(friedmanchisquare(cost_hill, cost_simulated, cost_genetico, cost_pso))
print(kruskal(cost_hill, cost_simulated, cost_genetico, cost_pso))
print(wilcoxon(cost_genetico, cost_pso))

# Creating plot
data = ([cost_hill, cost_simulated, cost_genetico, cost_pso])
plt.boxplot(data)

# show plot
plt.show()
Example #15
0
def precision_plot_table():
    '''
    Precision bar plots for all folds together with friedman test.
    Prints precision information to be used in R
    '''
    tables = []
    for d in DIMS:
        pBetter=[]
        pVote=[]
        pWeigh=[]
        pFvote=[]

        for fold in range(NFOLDS):
            out = loadfile(RESULTSFOLDER+'u-100k-fold-'+str(d)+'-d'+str(fold)+'-top%d-results.out'%TOPK)
            #ordem: maioria, ponderado, best
            #print out[-1]
            pBetter.append(out[E_ID['best']][2])
            pWeigh.append(out[E_ID['weighted']][1])
            pVote.append(out[E_ID['vote']][1])
            pFvote.append(out[E_ID['filtered']][1])

        print 'Friedman', friedmanchisquare(np.array(pBetter),
                                 np.array(pWeigh),
                                 np.array(pVote),
                                 np.array(pFvote))

        print 'Precisions d=%d'%d
        Rprint = lambda alist :  ' = c(' + \
                               ", ".join([str(it) for it in alist]) + ')'
        folds = [i for i in range(NFOLDS)]*3
        precisions = pBetter + pWeigh + pVote + pFvote
        ids = ["'best'"]*NFOLDS + ["'weight'"]*NFOLDS + \
              ["'vote'"]*NFOLDS + ["'filtered'"]*NFOLDS
        print 'datafold', Rprint(folds)
        print 'precision', Rprint(precisions)
        print 'algo', Rprint(ids)

        tables.append(np.vstack((np.array(pBetter),
                                 np.array(pWeigh),
                                 np.array(pVote),
                                 np.array(pFvote) )).T)
        x=np.arange(1,NFOLDS+1,1)

        y = [4, 9, 2,5,6]
        z=[1,2,3,5,7]
        k=[11,12,13,5,9]
        plt.figure()
        ax = plt.subplot(111)
        w = 0.2
        ax.bar(x-2*w, pVote,width=w,color=COLORS[0],align='center', label='Vote')
        ax.bar(x-w, pFvote,width=w,color=COLORS[1],align='center', label='Filtered')
        ax.bar(x, pWeigh,width=w,color=COLORS[2],align='center', label='Weighted')
        ax.bar(x+w, pBetter,width=w,color=COLORS[3],align='center', label='Best')
        ax.autoscale(tight=True)
        ax.legend(loc=4)
        plt.ylim((0.7,1.0))
        #plt.ylim((0,0.9))
        plt.xticks(range(1,NFOLDS+1),['Fold %d'%i for i in range(1,NFOLDS+1)])
        plt.title('Precision (d=%d)'%d)
        plt.savefig(RESULTSFOLDER+'precisionat%d_bars_d%d.png'%(TOPK, d))

    table = np.hstack((tables[0],tables[1]))
    table = np.vstack((table,table.mean(axis=0), table.std(axis=0)))

    np.savetxt(RESULTSFOLDER+'table_precisions.txt', table, fmt='%.4f', delimiter=' & ', newline='\\\\ \hline \n')
Example #16
0
from scipy.stats import friedmanchisquare, f
import pandas as pd

df = pd.read_csv('decisiontree.csv', header=0, index_col=0)
treeclassifier = df.to_numpy().reshape((10, ))

df1 = pd.read_csv('multiclass.csv', header=0, index_col=0)
multiclass = df1.to_numpy()

ovo = multiclass[:, 0]
ovr = multiclass[:, 1]
ecoc = multiclass[:, 2]

friedman_stat, pvalue = friedmanchisquare(treeclassifier, ovo, ovr, ecoc)

datasets = 10
algorithms = 4

davenport_stat = (
    (datasets - 1) * friedman_stat) / (datasets *
                                       (algorithms - 1) - friedman_stat)
print(davenport_stat)
print(
    f.ppf(q=1 - 0.05,
          dfn=algorithms - 1,
          dfd=(algorithms - 1) * (datasets - 1)))

if davenport_stat > f.ppf(
        q=1 - 0.05, dfn=algorithms - 1, dfd=(algorithms - 1) * (datasets - 1)):
    print("Hay diferencias entre los algoritmos")
else:
# coding=utf-8
from scipy.stats import friedmanchisquare
import scikit_posthocs as sp  #pip3 install scikit-posthocs
import pandas as pd

df = pd.read_csv('csv/comparativo_tecnicas.csv', encoding='utf-8')

print('\nFriedman')

stat, p = friedmanchisquare(df['dt'].tolist(), df['nb'].tolist(),
                            df['mlp'].tolist())
print('p=%.3f' % (p))
if p > 0.05:
    print('Não há diferença significativa')

else:
    print('Há diferença significativa')

    print('\n Posthoc')
    posthoc = sp.posthoc_nemenyi(
        [df['dt'].tolist(), df['nb'].tolist(), df['mlp'].tolist()])
    print(posthoc)
Example #18
0
    # creo un vector muestra para guardar los resultados (representa una mudestra)
    muesta = []
    # Abro el archivo
    with open("Muestras/" + str(nombreArchivo) + ".out") as archivo:
        leido = archivo.readlines()
        leido = [x.strip() for x in leido]
        for x in leido:
            muesta.append(float(x))
        print("\tSolucion: " + str(muesta))
        # para cada linea
        # leo cada linea agreagando el valor de la solucion al vector muestra
    # cierro el archivo
    archivo.close()
    print("Muestra: " + str(muesta))
    # agrego el vector muestra al conjunto de muestras
    conjuntoMuestras.append(muesta)

print
print("Conjunto de Muestas: " + str(conjuntoMuestras))
conjuntoMuestras = np.asarray(conjuntoMuestras)
resultado = stats.friedmanchisquare(
    *(conjuntoMuestras[i, :] for i in range(conjuntoMuestras.shape[0])))
# conjuntoMuestras[0],conjuntoMuestras[1],conjuntoMuestras[2])
print(resultado)

# ejecuto el test de friedman para el conjunto de muestras

# conjuntoMuestras = np.array([[2, 2, 3], [1, 1, 1] ,[4, 5, 6]])
# resultado=stats.friedmanchisquare(*(conjuntoMuestras[i, :] for i in range(conjuntoMuestras.shape[0])))
# print resultado
Example #19
0
    filename_, file_extension = os.path.splitext(filename)
    if file_extension == ".xlsx":
        data = pd.read_excel(dirname+'\\'+filename,usecols=['dataset_name', 'algorithm_name', 'roc_auc'])
    else:
        data = pd.read_csv(dirname+'\\'+filename, usecols=['dataset_name', 'algorithm_name', 'roc_auc'])
    # average auc for each dataset
    avg_auc = data.groupby(['dataset_name', 'algorithm_name'], as_index= False).mean()
    res_df = res_df.append(avg_auc)
res_df.reset_index(inplace=True, drop=True)
# get all datasets names with results from all four algorithms
dataset_names = res_df.groupby('dataset_name', as_index = False).count()
dataset_names = list(dataset_names[dataset_names['algorithm_name'] == 4]['dataset_name'])
# filter result to contain only datasets that are in dataset_names
res_df = res_df[res_df['dataset_name'].apply(lambda x: x in dataset_names)]
alog_names = list(res_df['algorithm_name'].unique())

# Friedman test
stat, p = stats.friedmanchisquare(res_df[res_df['algorithm_name'] == alog_names[0]].sort_values(by='dataset_name')['roc_auc'],
                              res_df[res_df['algorithm_name'] == alog_names[1]].sort_values(by='dataset_name')['roc_auc'],
                              res_df[res_df['algorithm_name'] == alog_names[2]].sort_values(by='dataset_name')['roc_auc'],
                              res_df[res_df['algorithm_name'] == alog_names[3]].sort_values(by='dataset_name')['roc_auc'])

# interpret results
alpha = 0.05
print('Statistics=%.3f, p=%.3f' % (stat, p))
if(p < alpha):
    print('null hypothesis rejected')
    # perform post-hoc test
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        print(sp.posthoc_nemenyi_friedman(res_df, y_col='roc_auc',block_col='dataset_name', group_col='algorithm_name',melted=True))
Example #20
0
        math.sqrt(total_obtain_answers * 8 * 2) for i in range(7)
    ]

    print("Task1 Accuracy:\n", task1_acc_avg)
    print("Task1 Accuracy 95\% CI:\n", task1_acc_ci)
    print("Task1 Completion Time:\n", task1_time_avg)
    print("Task1 Completion Time 95\% CI:\n", task1_time_ci)

    task1_acc_detail = [[
        np.average(task1_acc[:, i, k, j]) for i in range(8) for j in range(2)
    ] for k in range(7)]
    task1_time_detail = [[
        np.average(task1_time[:, i, k, j]) for i in range(8) for j in range(2)
    ] for k in range(7)]

    acc_fm_res = stats.friedmanchisquare(*task1_acc_detail)
    time_fm_res = stats.friedmanchisquare(*task1_time_detail)

    print(acc_fm_res)
    print(
        sp.posthoc_conover_friedman(np.array(task1_acc_detail).T,
                                    p_adjust=None))
    print(time_fm_res)
    print(sp.posthoc_conover_friedman(np.array(task1_time_detail).T))

    print()
    print("E2")
    # Task 2 - density_2
    task2_acc = np.zeros((total_obtain_answers, 8, 7, 2)).astype(np.int)
    task2_time = np.zeros((total_obtain_answers, 8, 7, 2)).astype(np.float)
    learning_effect_acc = np.zeros(
Example #21
0
 def non_parametric_test(self,
                         x: str,
                         y: str,
                         meth: str = 'kruskal-wallis',
                         continuity_correction: bool = True,
                         alternative: str = 'two-sided',
                         zero_meth: str = 'pratt',
                         *args):
     """
     :param x:
     :param y:
     :param meth: String defining the hypothesis test method for non-parametric tests
                     -> kruskal-wallis: Kruskal-Wallis H test to test whether the distributions of two or more
                                        independent samples are equal or not
                     -> mann-whitney: Mann-Whitney U test to test whether the distributions of two independent
                                      samples are equal or not
                     -> wilcoxon: Wilcoxon Signed-Rank test for test whether the distributions of two paired samples
                                  are equal or not
                     -> friedman: Friedman test for test whether the distributions of two or more paired samples
                                  are equal or not
     :param continuity_correction:
     :param alternative: String defining the type of hypothesis test
                         -> two-sided:
                         -> less:
                         -> greater:
     :param zero_meth: String defining the method to handle zero differences in the ranking process (Wilcoxon test)
                         -> pratt: Pratt treatment that includes zero-differences (more conservative)
                         -> wilcox: Wilcox tratment that discards all zero-differences
                         -> zsplit: Zero rank split, just like Pratt, but spliting the zero rank between positive
                                    and negative ones
     :param args:
     :return:
     """
     _reject = None
     if meth == 'kruskal-wallis':
         _non_parametric_test = kruskal(args, self.nan_policy)
     elif meth == 'mann-whitney':
         _non_parametric_test = mannwhitneyu(
             x=self.data_set[x],
             y=self.data_set[y],
             use_continuity=continuity_correction,
             alternative=alternative)
     elif meth == 'wilcoxon':
         _non_parametric_test = wilcoxon(x=self.data_set[x],
                                         y=self.data_set[y],
                                         zero_method=zero_meth,
                                         correction=continuity_correction)
     elif meth == 'friedman':
         _non_parametric_test = friedmanchisquare(args)
     else:
         raise HappyLearningUtilsException('No non-parametric test found !')
     if _non_parametric_test[1] <= self.p:
         _reject = False
     else:
         _reject = True
     return {
         'feature': ''.join(self.data_set.keys()),
         'cases': len(self.data_set.values),
         'test_statistic': _non_parametric_test[0],
         'p_value': _non_parametric_test[1],
         'reject': _reject
     }
Example #22
0
    def get_test(self):
        """
        :param model_type: for which we want to extract
        :return:
        """

        print(f"Calculating sign test for subset: {self.subset_type}")

        # Load all models predictions for each phase
        for phase in self.phase_list:
            phase_all_models_ndcg_list = []  # ndcg list
            model_type_list = []  # Name of the model

            for model_type in self.models_list:
                ndcg_path = self._get_ndcg_path(self.model_preds_root,
                                                model_type,
                                                phase=phase,
                                                subset_type=self.subset_type)
                ndcg_list = self.read_file_as_list(ndcg_path)
                print(f"Total samples {model_type}: {len(ndcg_list)}")
                phase_all_models_ndcg_list.append(ndcg_list)
                model_type_list.append(model_type)

            # We form combinations of all indices
            index_models_list = list(range(len(model_type_list)))
            # pairwise
            combination_set = combinations(index_models_list, 2)

            for combination_indices in combination_set:
                model1_preds = phase_all_models_ndcg_list[
                    combination_indices[0]]
                model2_preds = phase_all_models_ndcg_list[
                    combination_indices[1]]
                model1_name = model_type_list[combination_indices[0]]
                model2_name = model_type_list[combination_indices[1]]

                stat, p = mannwhitneyu(model1_preds, model2_preds)
                print(
                    f'Mannwhitneyu - For phase: {phase} - models: {model1_name} vs'
                    f' {model2_name} : stat={stat:.4f}, p={p:.4f}')

                stat, p = wilcoxon(model1_preds, model2_preds)
                print(
                    f'Wilcoxon - For phase: {phase} - models: {model1_name} vs'
                    f' {model2_name} : stat={stat:.4f}, p={p:.4f}')

            # Checking for equivalence of *args
            # stat, p = f_oneway(phase_all_models_ndcg_list[0],
            # phase_all_models_ndcg_list[1], phase_all_models_ndcg_list[2],
            # phase_all_models_ndcg_list[3])
            # stat, p = f_oneway(*phase_all_models_ndcg_list)
            # stat, p = mannwhitneyu(*phase_all_models_ndcg_list)
            # stat, p = wilcoxon(*phase_all_models_ndcg_list)
            stat, p = kruskal(*phase_all_models_ndcg_list)
            print(f'Kruskal - For phase: {phase}: stat={stat:.4f}, p={p:.4f}')

            bonferroni_correction = multipletests(p, method='bonferroni')
            # print(bonferroni_correction)
            # (reject, pvals_corrected, alphacSidak, alphacBonf)
            action = str(bonferroni_correction[0][0])  # np array
            new_p_value = bonferroni_correction[1][0]
            print(
                f'Kruskal - bonferroni - For phase: {phase}: p={new_p_value:.4f}, '
                f'action: {str(action)}')

            stat, p = friedmanchisquare(*phase_all_models_ndcg_list)
            print(
                f'Friedmanchisquare - For phase: {phase}: stat={stat:.4f}, p={p:.4f}'
            )
Example #23
0
                    max_iter=500,
                    momentum=0.9)

SVM = svm.SVC()
GNB = GaussianNB()

cv = StratifiedKFold(n_splits=10)

scores_net = cross_val_score(net, X, y, cv=cv, scoring='accuracy')
scores_SVM = cross_val_score(SVM, X, y, cv=cv, scoring='accuracy')
scores_GNB = cross_val_score(GNB, X, y, cv=cv, scoring='accuracy')

ic_net = stats.norm.interval(0.95,
                             loc=1 - scores_net.mean(),
                             scale=scores_net.std() / sqrt(len(X)))
ic_SVM = stats.norm.interval(0.95,
                             loc=1 - scores_SVM.mean(),
                             scale=scores_SVM.std() / sqrt(len(X)))
ic_GNB = stats.norm.interval(0.95,
                             loc=1 - scores_GNB.mean(),
                             scale=scores_GNB.std() / sqrt(len(X)))

print("Error MLP: %0.2f (+/- %0.2f)" %
      (1 - scores_net.mean(), scores_net.mean() - ic_net[0]))
print("Error SVM: %0.2f (+/- %0.2f)" %
      (1 - scores_SVM.mean(), scores_SVM.mean() - ic_SVM[0]))
print("Error GNB: %0.2f (+/- %0.2f)" %
      (1 - scores_GNB.mean(), scores_GNB.mean() - ic_GNB[0]))

print stats.friedmanchisquare(scores_net, scores_SVM, scores_GNB)
Example #24
0
def friedman_test(experiment_pivot_df):
    """Returns p-value for Friedman test."""
    pivot_df_as_matrix = experiment_pivot_df.values
    _, p_value = ss.friedmanchisquare(*pivot_df_as_matrix)
    return p_value
Example #25
0
cds["Attrition"].replace(to_replace=("Yes","No"),value=(1,0),inplace=True)
print(cds.head())

#Processing wilcoxon test to check if a particular variable 
#H0= There is no significiant attrition in the company caused by the age of the employees and the distance from home.
#H1= There is significiant attrition in the company caused by the age of the employees and the distance from home.

from scipy.stats import wilcoxon
print("\n")
print(wilcoxon(cds.Age,cds.DistanceFromHome))
#Output: WilcoxonResult(statistic=2892.0, pvalue=0.0)

#Friedman test
from scipy.stats import friedmanchisquare
print("\n")
print(friedmanchisquare(cds.Age,cds.DistanceFromHome,cds.JobLevel))
#output: FriedmanchisquareResult(statistic=7478.114178094286, pvalue=0.0)

#Mann Whitney test
from scipy.stats import mannwhitneyu
print("\n")
print(mannwhitneyu(cds.JobLevel,cds.NumCompaniesWorked))

# Moving on to Krushkal Wallis Test
from scipy.stats import kruskal
print("\n")
print(kruskal(cds.TotalWorkingYears,cds.TrainingTimesLastYear,cds.YearsAtCompany,cds.YearsSinceLastPromotion))

#ChiSquare Test
from scipy.stats import chi2_contingency
print("\n")
            network_dict[n]['entropy'] = np.array(network_dict[n]['entropy'])
            amsd_results[n].append(np.mean(network_dict[n]['dist']))
            t_dist[n].append(network_dict[n]['dist'])
            t_entropy[n].append(network_dict[n]['entropy'])
            print('AMSD :', amsd_results[n][-1], '+-',
                  np.std(network_dict[n]['dist']))
            if n not in ame_results:
                ame_results[n] = []
            ame_results[n].append(np.mean(network_dict[n]['entropy']))
            print('AMUD :', ame_results[n][-1], '+-',
                  np.std(network_dict[n]['entropy']))
            print('')

    for n in ['1', '3']:
        print('Nd:', n)
        _, p = friedmanchisquare(t_dist[n][0], t_dist[n][1], t_dist[n][2])
        data = np.array(t_dist[n])
        # data = np.argsort(data, axis=0)
        _, pk = f_oneway(data[0], data[2])
        print('ANOVA AMSD :', pk)
        _, p_12 = wilcoxon(t_dist[n][0], t_dist[n][3], zero_method="pratt")
        _, p_13 = wilcoxon(t_dist[n][1], t_dist[n][4], zero_method="pratt")
        _, p_23 = wilcoxon(t_dist[n][2], t_dist[n][5], zero_method="pratt")
        print('FRIEDMAN AMSD :', p, p_12, p_13, p_23)
        print('MEAN AMSD : ', np.mean(data, axis=1), np.std(data, axis=1))
        print('MEAN AMSD : ', np.argsort(data[:2], axis=0).mean(axis=1))
        print('MEAN AMSD : ', np.argsort(data[1:], axis=0).mean(axis=1))
        _, p = friedmanchisquare(t_entropy[n][0], t_entropy[n][1],
                                 t_entropy[n][2])
        data = np.array(t_entropy[n])
        # data = np.argsort(data, axis=0)
# ### Statistical analysis to select the best K-IBL algorithm

# In[10]:

import json

with open('accuracies_sick.json') as json_file:
    accuracies_json = json.load(json_file)

accs = [accuracies_json[k] for k in accuracies_json]
len(accs)

from scipy.stats import friedmanchisquare

friedmanchisquare(*accs)

get_ipython().system(' pip install scikit-posthocs')

from scikit_posthocs import posthoc_dunn

p_values = posthoc_dunn(a=accs, p_adjust='holm', sort=True)
print('Post hocs dunn p-values: ')
p_values.head()

# In[11]:

pvalues = p_values.values
classifiers = accs.copy()
best = 0
best_acc = np.mean(accs[0])
def friedman_test(df: pd.DataFrame):
    cols = [x for x in df.columns if x != 'dataset']
    data = [df[col] for col in cols]
    res = friedmanchisquare(*data)
    return res
Example #29
0
#https://machinelearningmastery.com/statistical-hypothesis-tests-in-python-cheat-sheet/
"Friedman Test ------------------------------------------------------------------------------------###"
""" Tests if the distribuitions of two or more paired samples are equal or not """
# Example of the Friedman Test
from scipy.stats import friedmanchisquare

# N of data must be equal!
data1 = [
    0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869
]
data2 = [
    1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169
]
data3 = [
    -0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204
]

stat, p = friedmanchisquare(data1, data2, data3)
print('stat=%.3f, p=%.3f' % (stat, p))
alpha = 0.05
if p > alpha:
    print('Probably the same distribution')
else:
    print('Probably different distributions')
Example #30
0
for group, title in zip(data, titles):
    endog.extend(group)
    groups.extend([title] * len(group))

m_comp = pairwise_tukeyhsd(np.asarray(endog), np.asarray(groups), 0.05)
print(m_comp)

data_seg = [
    results.rf, results.lf, results.rt, results.lt, results.rs, results.ls
]
data_act = [
    results.walking, results.running, results.standing, results.sitting,
    results.bicycling
]
f_comp = stats.friedmanchisquare(*data_seg)
print(f_comp)

#data = [proposed_dist.rice, proposed_dist.spline, proposed_dist.fifth, proposed_dist.fourth, proposed_dist.third, proposed_dist.second, proposed_dist.linear, proposed_dist.diff]
data = [
    proposed_dist.fifth, proposed_dist.fourth, proposed_dist.third,
    proposed_dist.second, proposed_dist.spline, proposed_dist.linear,
    proposed_dist.diff
]
n_comp = sp.posthoc_nemenyi(data)
print(n_comp)
#n_comp = sp.posthoc_nemenyi_friedman(data)
#print(n_comp)

num_cat = len(data)
Example #31
0
        ],
        [
            3131.5099999999993, 3147.58, 3147.7299999999996, 3133.35,
            3148.0299999999997, 3147.2799999999997, 3145.2099999999996,
            3126.78, 3145.2099999999996, 3141.46
        ]]

# df = pd.DataFrame(rastringin).T
# df.to_excel(excel_writer = "C:/Users/Tulio/Desktop/Mestrado/Busca_e_Otimizacao/search_and_optmization/src/utils/test.xlsx")

data = a280

# print(statistics.pstdev(data[2]))

# Friedman de grupo
print(stats.friedmanchisquare(*data))

# Kruskal-Wallis de grupo
print(stats.kruskal(*data))

#Teste de Conover baseado em Kruskal-Wallis
pc = sp.posthoc_conover(data)

#Caso precise mudar os indices e colunas do DataFrame
pc.columns = ['GRASP 2-opt', 'GRASP mBUC', 'HC mBUC']
pc.index = ['GRASP 2-opt', 'GRASP mBUC', 'HC mBUC']

print(pc)

#Heatmap do Teste de Conover
cmap = ['1', '#fb6a4a', '#08306b', '#4292c6', '#c6dbef']
Example #32
0
    table.to_csv(os.path.join(results_path, 'main_results.csv'))
    table.to_latex(os.path.join(results_path, 'main_results.tex'))

    remove_list = [[], ['isotonic'], ['beta2'], ['beta05'], ['beta', 'beta05'],
                   ['beta2', 'beta05'], [None, 'None', 'isotonic', 'sigmoid']]
    for rem in remove_list:
        df_rem = df_all[np.logical_not(np.in1d(df_all.method, rem))]
        methods_rem = [method for method in methods if method not in rem]
        print methods_rem
        print("-#-#-#-#-#-#-#-#-#-#-#-#-ACC-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-")
        table = df_rem.pivot_table(index=['dataset'], columns=['method'],
                                   values=['acc'], aggfunc=[np.mean, np.std])
        table_to_latex(dataset_names, methods_rem, table, max_is_better=True)
        accs = table.as_matrix()[:, :len(methods_rem)]
        print friedmanchisquare(*[accs[:, x] for x in np.arange(accs.shape[1])])
        table.to_csv(os.path.join(results_path, 'main_acc' + str(methods_rem) + '.csv'))

        print("-#-#-#-#-#-#-#-#-#-#-#-LOSS-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-")
        table = df_rem.pivot_table(index=['dataset'], columns=['method'],
                                   values=['loss'], aggfunc=[np.mean, np.std])
        table_to_latex(dataset_names, methods_rem, table, max_is_better=False)
        losses = table.as_matrix()[:, :len(methods_rem)]
        print friedmanchisquare(*[losses[:, x] for x in np.arange(losses.shape[1])])
        table.to_csv(os.path.join(results_path, 'main_loss' + str(methods_rem) + '.csv'))

        print("-#-#-#-#-#-#-#-#-#-#-#-BRIER-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-")
        table = df_rem.pivot_table(index=['dataset'], columns=['method'],
                                   values=['brier'], aggfunc=[np.mean, np.std])
        table_to_latex(dataset_names, methods_rem, table, max_is_better=False)
        briers = table.as_matrix()[:, :len(methods_rem)]
Example #33
0
#    except Exception as e:
#        print('En ',ds,'_',tip,'_',l,'_',det)
#        print (e.__doc__)
#        print (e.message)

######################## FRIEDMAN TESTS ########################

from scipy.stats import friedmanchisquare

alpha = 0.05

#FOR PREQ.ACC
stat_preqacc, p_preqacc = friedmanchisquare(friedman_DDM_preqacc,
                                            friedman_EDDM_preqacc,
                                            friedman_ADWIN_preqacc,
                                            friedman_PH_preqacc,
                                            friedman_CURIE_preqacc)
print('---- PREQ. ACC ----')
print('Statistics=%.3f, p=%.3f' % (stat_preqacc, p_preqacc))
if p_preqacc > alpha:
    print('Same distributions (fail to reject H0)')
else:
    print('Different distributions (reject H0)')
print('')

#FOR RAM-HOURS
stat_ramhours, p_ramhours = friedmanchisquare(friedman_DDM_ramhours,
                                              friedman_EDDM_ramhours,
                                              friedman_ADWIN_ramhours,
                                              friedman_PH_ramhours,
Example #34
0
 def time_friedmanchisquare(self):
     stats.friedmanchisquare(self.a, self.b, self.c)
Example #35
0
from scipy import stats

# Generate the data
nd = stats.norm(5,3)
data = nd.rvs((100,3))

# Check if the three are from the same distribution
alpha = 0.05
_,p = stats.friedmanchisquare(data[:,0], data[:,1], data[:,2])
if p > alpha:
    print('First datasets come from the same distribution')

# Modify one of the data
data[:,1] += 3
_,p = stats.friedmanchisquare(data[:,0], data[:,1], data[:,2])
if p < alpha:
    print('After the modification, one of the datasets is different')

# %%
"""
## Positive affect
"""

# %%
"""
### Friedman test
"""

# %%

# %%
# Compare groups
_, p = stats.friedmanchisquare(smile_pos, neutral_pos, sad_pos)
print('p={:.5f}'.format(p))

if p > ALPHA:
    print('Same distributions')
    exit()
else:
    print('Different distributions. You can do a post-hoc test.')

# %%
"""
### Wilcoxon test (as a post-hoc test)
"""

# %%
# Smiley face vs Neutral face
Example #37
0
def run(file_name):

	results = util.from_file(file_name)

	# Calculate accuracies for each classifier based on the true and
	# predicted labels
	accuracies = {}
	for classifier in results.keys():
		accuracies[classifier] = []
		for info in results[classifier]:
			y_true = info['true']
			y_pred = info['pred']
			accuracies[classifier].append(evaluate.get_accuracy_score(y_true, y_pred))

	# Create plot of scores
	if not os.path.isdir('figs'):
		os.mkdir('figs')
	df = create_df(accuracies)
	sns.barplot(x='classifiers', y='scores', data=df)
	plt.savefig('figs/scores.png', transparent=True)
	create_plot(accuracies)

	# Print mean accuracies and standard deviations
	REPORT.info()
	REPORT.info('Accuracies:')
	for classifier in accuracies.keys():
		REPORT.info('{}: {} +/- {}'.format(classifier, np.mean(accuracies[classifier]), np.std(accuracies[classifier])))
	REPORT.info()
	
	# Run matched-samples, equal-variance t-test
	REPORT.info('Pairwise t-test:')
	p_value_min = 1.0
	classifiers_min = []
	for classifier1 in accuracies.keys():
		for classifier2 in accuracies.keys():
			scores1 = accuracies[classifier1]
			scores2 = accuracies[classifier2]
			p_value = ttest_ind(scores1, scores2, equal_var=True)[1]
			if p_value < p_value_min:
				p_value_min = p_value
				classifiers_min = [classifier1, classifier2]
			REPORT.info('{} <-> {}: {}'.format(classifier1, classifier2, p_value))
	REPORT.info('Min. p-value: {} between {} and {}'.format(p_value_min, classifiers_min[0], classifiers_min[1]))
	REPORT.info()

	# Run pairwise Wilcoxon ranked-sign test
	REPORT.info('Pairwise Wilcoxon ranked-sign test:')
	p_value_min = 1.0
	classifiers_min = []
	p_values = []
	classifiers = []
	for classifier1 in accuracies.keys():
		for classifier2 in accuracies.keys():
			if classifier1 == classifier2:
				continue
			scores1 = accuracies[classifier1]
			scores2 = accuracies[classifier2]
			p_value = wilcoxon(scores1, scores2, zero_method='wilcox')[1]
			if p_value < p_value_min:
				p_value_min = p_value
				classifiers_min = [classifier1, classifier2]
			if p_value < 0.05:
				p_values.append(p_value)
				classifiers.append([classifier1, classifier2])
			REPORT.info('{} <-> {}: wilcoxon p-value: {}'.format(classifier1, classifier2, p_value))
	REPORT.info('Min. p-value: {} between {} and {}'.format(p_value_min, classifiers_min[0], classifiers_min[1]))
	REPORT.info('Below 0.05:')
	for i in range(len(p_values)):
		REPORT.info('  p-value: {} between {} and {}'.format(p_values[i], classifiers[i][0], classifiers[i][1]))
	REPORT.info()

	# Run Friedman test
	REPORT.info('Friedman\'s Chi-Square test:')
	data = []
	for classifier in accuracies.keys():
		row = []
		for item in chunk(accuracies[classifier], 10):
			row.append(np.mean(item))
		data.append(row)
	data = pd.DataFrame(
		np.array(data).T,
		columns=accuracies.keys(),
		index=['D' + str(i) for i in range(10)])

	p_value = friedmanchisquare(
		data['svm-rbf'],
		data['svm-poly'],
		data['svm-sigmoid'],
		data['random-forest'],
		data['gp'])[1]
	REPORT.info()
	REPORT.info('p_value: {}'.format(p_value))

	# The p-value tells us IF there's a significant difference between
	# performance scores. The Q statistic allows us to determine which
	# classifiers are best.
	ranks = []
	for row in data.index:
		ranked = np.array(data.loc[row].tolist()).argsort()[::-1]
		ranks.append(ranked)
	ranks = pd.DataFrame(ranks, columns=data.columns, index=data.index)

	n = ranks.shape[0]
	k = ranks.shape[1]

	mean_ranks = []
	for column in ranks.columns:
		mean_ranks.append(np.mean(ranks[column]))
	mean_ranks = pd.DataFrame([mean_ranks], columns=ranks.columns, index=['mean_ranks'])
	print(mean_ranks)

	# Define q threshold at alpha = 0.05 and df = (n-1)*(k-1)
	qq_threshold = 3.85 / np.sqrt(2)
	REPORT.info('Q-threshold: {}'.format(qq_threshold))
	tuples = []
	for column1 in ranks.columns:
		for column2 in ranks.columns:
			if column1 == column2:
				continue
			if (column1, column2) not in tuples:
				tuples.append((column1, column2))
			if (column2, column1) in tuples:
				continue
			r1 = mean_ranks[column1][0]
			r2 = mean_ranks[column2][0]
			qq = (r1 - r2) / np.sqrt((k * (k + 1.0)) / (6.0 * n))
			REPORT.info('{} <-> {}: q = {}'.format(column1, column2, qq))
			if qq > qq_threshold:
				REPORT.info('  q-statistic significant: {} > {}'.format(qq, qq_threshold))

	# Close the report file
	REPORT.append_file(__file__)
	REPORT.close()
X, y = ClassesB.ReadCongress()
acc4 = ClassesB.KfoldAcc(X, y)

a = np.zeros([4, 4])
classfier = ['naive', 'neigh', 'rule', 'tree']

for index, classifier in enumerate(classfier):
    a[0, index] = np.mean(acc1[classifier])
    a[1, index] = np.mean(acc2[classifier])
    a[2, index] = np.mean(acc3[classifier])
    a[3, index] = np.mean(acc4[classifier])

from scipy.stats import friedmanchisquare

statistic, p_value = friedmanchisquare(a[:, 0], a[:, 1], a[:, 2], a[:, 3])

for index, classifier in enumerate(classfier):
    p_value[index, 0] = friedmanchisquare(acc1[classifier],
                                          acc2[classifier])[1]
    p_value[index, 1] = friedmanchisquare(acc1[classifier],
                                          acc3[classifier])[1]
    p_value[index, 2] = friedmanchisquare(acc1[classifier],
                                          acc4[classifier])[1]
    p_value[index, 3] = friedmanchisquare(acc2[classifier],
                                          acc3[classifier])[1]
    p_value[index, 4] = friedmanchisquare(acc2[classifier],
                                          acc4[classifier])[1]
    p_value[index, 5] = friedmanchisquare(acc3[classifier],
                                          acc4[classifier])[1]
"""
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()

def friction_stats(df, effect_variable='mu'):
    """
    Builds a statistical report for a data frame
    """
    pass


if __name__ == '__main__':
    # par_dir = fh.get_dir()
    par_dir = '/home/michael/Dropbox/Eyelid_edge_dev/Test_Data/BiomomentumData/TestData/MS1604'

    df = analyze_multi(parent_dir=par_dir, save=False)
    print(df)
    # repeated measures variables

    # Figure out n and get descriptive statistics

    # Friedman chi square test
    measurements = []
    num_lenses = None
    fried_stats, fried_ps = stats.friedmanchisquare()