Esempio n. 1
0
def test_tost_asym():
    x1, x2 = clinic[:15, 2], clinic[15:, 2]
    #Note: x1, x2 reversed by definition in multeq.dif
    assert_almost_equal(x2.mean() - x1.mean(), tost_clinic_1_asym.estimate, 13)
    resa = smws.ttost_ind(x2, x1, -1.5, 0.6, usevar='unequal')
    assert_almost_equal(resa[0], tost_clinic_1_asym.p_value, 13)

    #multi-endpoints, asymmetric bounds, vectorized
    resall = smws.ttost_ind(clinic[15:, 2:7],
                            clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5],
                            0.6,
                            usevar='unequal')
    assert_almost_equal(resall[0], tost_clinic_all_no_multi.p_value, 13)

    #SMOKE tests: foe multi-endpoint vectorized, k on k
    resall = smws.ttost_ind(clinic[15:, 2:7],
                            clinic[:15, 2:7],
                            np.exp([-1.0, -1.0, -1.5, -1.5, -1.5]),
                            0.6,
                            usevar='unequal',
                            transform=np.log)
    resall = smws.ttost_ind(clinic[15:, 2:7],
                            clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5],
                            0.6,
                            usevar='unequal',
                            transform=np.exp)

    resall = smws.ttost_paired(clinic[15:, 2:7],
                               clinic[:15, 2:7],
                               [-1.0, -1.0, -1.5, -1.5, -1.5],
                               0.6,
                               transform=np.log)
    resall = smws.ttost_paired(clinic[15:, 2:7],
                               clinic[:15, 2:7],
                               [-1.0, -1.0, -1.5, -1.5, -1.5],
                               0.6,
                               transform=np.exp)

    resall = smws.ttest_ind(clinic[15:, 2:7],
                            clinic[:15, 2:7],
                            value=[-1.0, -1.0, -1.5, -1.5, -1.5])

    #k on 1: compare all with reference
    resall = smws.ttost_ind(clinic[15:, 2:7],
                            clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5],
                            0.6,
                            usevar='unequal')
    resa3_2 = smws.ttost_ind(clinic[15:, 3:4],
                             clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5],
                             0.6,
                             usevar='unequal')
    assert_almost_equal(resall[0][1], resa3_2[0][1], decimal=13)
    resall = smws.ttost_ind(clinic[15:, 2],
                            clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5],
                            0.6,
                            usevar='unequal')
    resall = smws.ttost_ind(clinic[15:, 2],
                            clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5],
                            np.repeat(0.6, 5),
                            usevar='unequal')
Esempio n. 2
0
def test_tost_log():
    x1, x2 = clinic[:15, 2], clinic[15:, 2]

    resp = smws.ttost_paired(x1, x2, 0.8, 1.25, transform=np.log)
    assert_almost_equal(resp[0], tost_clinic_1_paired.p_value, 13)

    resi = smws.ttost_ind(x1, x2, 0.8, 1.25, transform=np.log, usevar='unequal')
    assert_almost_equal(resi[0], tost_clinic_1_indep.p_value, 13)
Esempio n. 3
0
def two_sample_equivalence_test(data_a, data_b):

    # Check if input data is valid
    if len(data_a) > 0 and len(data_b) > 0:

        # Query config to check delta calculation methods
        use_formula = read_id_from_config("FaultSight", "useDeltaFormulaForTost")

        if use_formula:
            delta = calculate_delta_by_formula(data_a, data_b)
        else:
            delta = calculate_delta_by_value(data_a, data_b)

        # usevar : string, 'pooled' or 'unequal'
        # If 'pooled', then the standard deviation of the samples is assumed to be the same.
        # If 'unequal', then Welsh ttest with Satterthwait degrees of freedom is used
        usevar = 'unequal'

        # Run TOST test
        import statsmodels.stats.weightstats as ssws
        p_value = ssws.ttost_ind(data_a, data_b, -1 * delta, delta, usevar=usevar)[0]

        mean_a, std_a = find_mean_and_std(data_a)
        mean_b, std_b = find_mean_and_std(data_b)
        print("Mean a: ", mean_a)
        print("Std a: ", std_a)
        print("Mean b: ", mean_b)
        print("Std b: ", std_b)
        print("Delta: ", delta)
        print("TOST P-val: ", p_value)

        tost_data = {
            'mean_a': mean_a,
            'std_a': std_a,
            'mean_b': mean_b,
            'std_b': std_b,
            'delta': delta,
            'p_val': p_value,
            'error': False
        }
    else:
        tost_data = {
            'mean_a': 0,
            'std_a': 0,
            'mean_b': 0,
            'std_b': 0,
            'delta': 0,
            'p_val': 0,
            'error': True
        }

    # Return data
    return tost_data
Esempio n. 4
0
def test_tost_log():
    x1, x2 = clinic[:15, 2], clinic[15:, 2]

    resp = smws.ttost_paired(x1, x2, 0.8, 1.25, transform=np.log)
    assert_almost_equal(resp[0], tost_clinic_1_paired.p_value, 13)

    resi = smws.ttost_ind(x1,
                          x2,
                          0.8,
                          1.25,
                          transform=np.log,
                          usevar='unequal')
    assert_almost_equal(resi[0], tost_clinic_1_indep.p_value, 13)
Esempio n. 5
0
def test_tost_asym():
    x1, x2 = clinic[:15, 2], clinic[15:, 2]
    #Note: x1, x2 reversed by definition in multeq.dif
    assert_almost_equal(x2.mean() - x1.mean(), tost_clinic_1_asym.estimate, 13)
    resa = smws.ttost_ind(x2, x1, -1.5, 0.6, usevar='unequal')
    assert_almost_equal(resa[0], tost_clinic_1_asym.p_value, 13)

    #multi-endpoints, asymmetric bounds, vectorized
    resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7],
                           [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6,
                           usevar='unequal')
    assert_almost_equal(resall[0], tost_clinic_all_no_multi.p_value, 13)

    #SMOKE tests: foe multi-endpoint vectorized, k on k
    resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7],
                           np.exp([-1.0, -1.0, -1.5, -1.5, -1.5]), 0.6,
                           usevar='unequal', transform=np.log)
    resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7],
                           [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6,
                           usevar='unequal', transform=np.exp)

    resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7],
                              [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6,
                              transform=np.log)
    resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7],
                              [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6,
                              transform=np.exp)

    resall = smws.ttest_ind(clinic[15:, 2:7], clinic[:15, 2:7],
                              value=[-1.0, -1.0, -1.5, -1.5, -1.5])

    #k on 1: compare all with reference
    resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:3],
                           [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal')
    resa3_2 = smws.ttost_ind(clinic[15:, 3:4], clinic[:15, 2:3],
                           [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal')
    assert_almost_equal(resall[0][1], resa3_2[0][1], decimal=13)
    resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2],
                           [-1.0, -0.5, -0.7, -1.5, -1.5], 0.6, usevar='unequal')
    resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2],
                           [-1.0, -0.5, -0.7, -1.5, -1.5],
                           np.repeat(0.6,5), usevar='unequal')
Esempio n. 6
0
 def setup_class(cls):
     cls.res2 = tost_clinic_indep_2_pooled
     x, y = clinic[:15, 3], clinic[15:, 3]
     cls.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='pooled')
     cls.res1.pvalue = res[0]
Esempio n. 7
0
 def setup_class(cls):
     cls.res2 = tost_clinic_indep_1
     x, y = clinic[:15, 2], clinic[15:, 2]
     cls.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='unequal')
     cls.res1.pvalue = res[0]
Esempio n. 8
0
ttest_clinic_indep_1_two_mu_pooled.method = ' Two Sample t-test'
ttest_clinic_indep_1_two_mu_pooled.data_name = 'clinic$var1[1:15] and clinic$var1[16:30]'

res1 = smws.ttost_paired(clinic[:15, 2],
                         clinic[15:, 2],
                         -0.6,
                         0.6,
                         transform=None)
res2 = smws.ttost_paired(clinic[:15, 3],
                         clinic[15:, 3],
                         -0.6,
                         0.6,
                         transform=None)
res = smws.ttost_ind(clinic[:15, 3],
                     clinic[15:, 3],
                     -0.6,
                     0.6,
                     usevar='unequal')


class CheckTostMixin:
    def test_pval(self):
        assert_almost_equal(self.res1.pvalue, self.res2.p_value, decimal=13)
        #assert_almost_equal(self.res1.df, self.res2.df, decimal=13)


class TestTostp1(CheckTostMixin):
    #paired var1
    @classmethod
    def setup_class(cls):
        cls.res2 = tost_clinic_paired_1
Esempio n. 9
0
 def setup_class(cls):
     cls.res2 = tost_clinic_indep_2_pooled
     x, y = clinic[:15, 3], clinic[15:, 3]
     cls.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='pooled')
     cls.res1.pvalue = res[0]
Esempio n. 10
0
 def setup_class(cls):
     cls.res2 = tost_clinic_indep_1
     x, y = clinic[:15, 2], clinic[15:, 2]
     cls.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='unequal')
     cls.res1.pvalue = res[0]
Esempio n. 11
0
ttest_clinic_indep_1_two_mu_pooled.statistic = -3.299592184135305
ttest_clinic_indep_1_two_mu_pooled.parameter = 28
ttest_clinic_indep_1_two_mu_pooled.p_value = 0.002643203760742494
ttest_clinic_indep_1_two_mu_pooled.conf_int = (-0.35391340938235, 0.6832467427156834)
ttest_clinic_indep_1_two_mu_pooled.estimate = (3.498, 3.333333333333333)
ttest_clinic_indep_1_two_mu_pooled.null_value = 1
ttest_clinic_indep_1_two_mu_pooled.alternative = 'two.sided'
ttest_clinic_indep_1_two_mu_pooled.method = ' Two Sample t-test'
ttest_clinic_indep_1_two_mu_pooled.data_name = 'clinic$var1[1:15] and clinic$var1[16:30]'




res1 = smws.ttost_paired(clinic[:15, 2], clinic[15:, 2], -0.6, 0.6, transform=None)
res2 = smws.ttost_paired(clinic[:15, 3], clinic[15:, 3], -0.6, 0.6, transform=None)
res = smws.ttost_ind(clinic[:15, 3], clinic[15:, 3], -0.6, 0.6, usevar='unequal')


class CheckTostMixin(object):

    def test_pval(self):
        assert_almost_equal(self.res1.pvalue, self.res2.p_value, decimal=13)
        #assert_almost_equal(self.res1.df, self.res2.df, decimal=13)

class TestTostp1(CheckTostMixin):
    #paired var1
    @classmethod
    def setup_class(cls):
        cls.res2 = tost_clinic_paired_1
        x1, x2 = clinic[:15, 2], clinic[15:, 2]
        cls.res1 = Holder()
Esempio n. 12
0
 def __init__(self):
     self.res2 = tost_clinic_indep_2_pooled
     x, y = clinic[:15, 3], clinic[15:, 3]
     self.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='pooled')
     self.res1.pvalue = res[0]
Esempio n. 13
0
 def __init__(self):
     self.res2 = tost_clinic_indep_1
     x, y = clinic[:15, 2], clinic[15:, 2]
     self.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='unequal')
     self.res1.pvalue = res[0]
Esempio n. 14
0
 def __init__(self):
     self.res2 = tost_clinic_indep_2_pooled
     x, y = clinic[:15, 3], clinic[15:, 3]
     self.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='pooled')
     self.res1.pvalue = res[0]
Esempio n. 15
0
 def __init__(self):
     self.res2 = tost_clinic_indep_1
     x, y = clinic[:15, 2], clinic[15:, 2]
     self.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='unequal')
     self.res1.pvalue = res[0]
Esempio n. 16
0
def docalc(args, data, len_data, sims, len_sims, error):
    """
	# Fitness Calculation Template:
	if set(args.error).issuperset(set(['the-acronysm'])):
		1. func = 0

		2. func = an algebraic expression combining the data average (data_avrg), data standard deviation (data_stdv), simulation average (sims_stdv),
		simulation standard deviation (sims_stdv), single experimental files (data.loc[i]), and/or simulation files (sims.loc[j])
		Note1: Perform two for-loops if using data.loc[i] and sims.loc[j].
		Note2: Please consider these variables are DataFrames, meaning that multiplication and division are methods (e.g. df1.division(df2))

		3. Drop NaN values (from experimental time points without simulated values, or simulated values without experimental data)
		with dropna(axis = 0, how = 'all').dropna(axis = 1, how = 'all'). Also transform Inf values with replace([numpy.inf, -numpy.inf], numpy.nan)

		4. Sum the two dimensions, and return a 6 float points scientific notation number (0 float points for statistical tests):
		error['the-acronysm'] = '{:.6e}'.format(func.dropna(axis = 0, how = 'all').dropna(axis = 1, how = 'all').sum().sum())
	"""

    if args.do_all:
        args.error = [
            'SDA', 'ADA', 'SSQ', 'CHISQ', 'MNSE', 'PWSD', 'APWSD', 'NPWSD',
            'ANPWSD', 'MWUT', 'WMWET', 'TOST', 'DUT'
        ]
        """
		SDA    : Squared Difference of Averages
		ADA    : Absolute Difference of Averages
		SSQ    : Sum of SQuares
		CHISQ  : Chi-Square (Differences divided by data standard deviation)
		MNSE   : Mean Normalized Square Error (Differences divided by data average)
		PWSD   : Pair-Wise Square Deviation
		APWSD  : Absolute Pair-Wise Deviation
		NPWSD  : Normalized Pair-Wise Square Deviation
		ANPWSD : Absolute Normalized Pair-Wise Deviation
		MWUT   : Mann-Whitney U-test (Mann and Whitney, 1947, DOI 10.1214/aoms/1177730491)
		WMWET  : Wellek's Mann-Whitney Equivalence Test (Wellek 1996, DOI 10.1002/bimj.4710380608)
		TOST   : Two one-sided t-tests (Dunnet and Gent, 1977, DOI 10.2307/2529457, as well other authors)
		DUT    : Double Mann-Whitney U-tests (Reviewed in Cornell, 1990, DOI 10.1080/03610929008830433)

		More information in https://pleione.readthedocs.io/en/latest/ObjectiveFunctions.html
		"""

        data_avrg = doavrg(data, len_data)
        data_stdv = dostdv(data, len_data)

        sims_avrg = doavrg(sims, len_sims)
        sims_stdv = dostdv(sims, len_sims)

    # former mean square error, now square difference of means
    if set(args.error).issuperset(set(['SDA'])) or set(args.error).issuperset(
            set(['MSE'])):
        func = 0

        if not args.do_all:
            data_avrg = doavrg(data, len_data)
            sims_avrg = doavrg(sims, len_sims)

        func = (data_avrg - sims_avrg)**2

        error['SDA'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # former mean absolute error, now absolute value of the difference of means
    if set(args.error).issuperset(set(['ADA'])) or set(args.error).issuperset(
            set(['MAE'])):
        func = 0

        if not args.do_all:
            data_avrg = doavrg(data, len_data)
            sims_avrg = doavrg(sims, len_sims)

        func = abs(data_avrg - sims_avrg)

        error['ADA'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # sum of squares (from BioNetFit paper)
    if set(args.error).issuperset(set(['SSQ'])):
        func = 0

        for i in range(len_data):
            for j in range(len_sims):
                func += (data.loc[i] - sims.loc[j])**2

        error['SSQ'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # chi-square (from BioNetFit paper)
    if set(args.error).issuperset(set(['CHISQ'])):
        func = 0

        if not args.do_all:
            data_stdv = dostdv(data, len_data)

        for i in range(len_data):
            for j in range(len_sims):
                func += ((data.loc[i] - sims.loc[j]).divide(data_stdv))**2

        error['CHISQ'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # mean normalized square error (from BioNetFit paper)
    if set(args.error).issuperset(set(['MNSE'])):
        func = 0

        if not args.do_all:
            data_avrg = doavrg(data, len_data)

        for i in range(len_data):
            for j in range(len_sims):
                func += ((data.loc[i] - sims.loc[j]).divide(data_avrg))**2

        error['MNSE'] = '{:.6e}'.format(
            func.replace([numpy.inf, -numpy.inf], numpy.nan).dropna(
                axis=0, how='all').dropna(axis=1, how='all').sum().sum())

    # pair-wise square deviation
    if set(args.error).issuperset(set(['PWSD'])):
        func = 0

        for i in range(len_data):
            for j in range(len_sims):
                func += ((data.loc[i] - sims.loc[j])**2).divide(len_data *
                                                                len_sims)

        error['PWSD'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # pair-wise absolute deviation
    if set(args.error).issuperset(set(['APWSD'])):
        func = 0

        for i in range(len_data):
            for j in range(len_sims):
                func += (abs(data.loc[i] - sims.loc[j])).divide(len_data *
                                                                len_sims)

        error['APWSD'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # normalized pair-wise square deviation (also implemented in BioNetFit as equation 3, but not normalized by the number of data * sims)
    if set(args.error).issuperset(set(['NPWSD'])):
        func = 0

        for i in range(len_data):
            for j in range(len_sims):
                func += (((data.loc[i] - sims.loc[j]).divide(
                    data.loc[i]))**2).divide(len_data * len_sims)

        error['NPWSD'] = '{:.6e}'.format(
            func.replace([numpy.inf, -numpy.inf], numpy.nan).dropna(
                axis=0, how='all').dropna(axis=1, how='all').sum().sum())

    # normalized pair-wise absolute deviation
    if set(args.error).issuperset(set(['ANPWSD'])):
        func = 0

        for i in range(len_data):
            for j in range(len_sims):
                func += (abs((data.loc[i] - sims.loc[j]).divide(
                    data.loc[i]))).divide(len_data * len_sims)

        error['ANPWSD'] = '{:.6e}'.format(
            func.replace([numpy.inf, -numpy.inf], numpy.nan).dropna(
                axis=0, how='all').dropna(axis=1, how='all').sum().sum())
    """
	Wellek's Mann-Whitney Equivalence Test.
	Based on mawi.R script from the EQUIVNONINF package
	modifications done to perform the test "vectorized"
	(it compares two matrices; the first has all exp data, the second all the simulations)
	"""
    if set(args.error).issuperset(set(['WMWET'])):
        from scipy.stats import ncx2
        # useful variables (namespace identical to mawi.R script)
        m = len_data  # x = data
        n = len_sims  # y = sims
        eps1_ = .3129  # Wellek's paper
        eps2_ = .2661  # Wellek's paper
        eqctr = 0.5 + (eps2_ - eps1_) / 2
        eqleng = eps1_ + eps2_

        # estimators needed for calculations
        wxy = pandas.DataFrame(index=sims.loc[0].index,
                               columns=sims.loc[0].columns).fillna(0)
        pihxxy = pandas.DataFrame(index=sims.loc[0].index,
                                  columns=sims.loc[0].columns).fillna(0)
        pihxyy = pandas.DataFrame(index=sims.loc[0].index,
                                  columns=sims.loc[0].columns).fillna(0)
        sigmah = pandas.DataFrame(index=sims.loc[0].index,
                                  columns=sims.loc[0].columns).fillna(0)

        # ŷ estimator (wxy in mawi.R)
        # equation 1.2 from Wellek 1996 paper
        # for (i in 1:m) for (j in 1:n) wxy <- wxy + trunc(0.5 * (sign(x[i] - y[j]) + 1))
        for i in range(m):
            for j in range(n):
                diff = (data.loc[i] - sims.loc[j])
                diff = diff.dropna(axis=0, how='all').dropna(axis=1, how='all')
                diff = diff.apply(numpy.sign)
                diff = diff + 1
                diff = diff.multiply(0.5)
                diff = diff.apply(numpy.trunc)
                # add to ŷ (wxy in mawi.R)
                wxy += diff

        # yFFG estimator (pihxxy in mawi.R)
        # equation 2.5a from Wellek 1996 paper
        #for (i1 in 1:(m - 1)) for (i2 in (i1 + 1):m) for (j in 1:n) pihxxy <- pihxxy + trunc(0.5 * (sign(min(x[i1], x[i2]) - y[j]) + 1))
        for xi1 in range(m - 1):
            for xi2 in range(xi1 + 1, m):
                for xj in range(n):
                    diff = data.loc[xi1].where(data.loc[xi1] < data.loc[xi2],
                                               data.loc[xi2]) - sims.loc[xj]
                    diff = diff.dropna(axis=0, how='all').dropna(axis=1,
                                                                 how='all')
                    diff = diff.apply(numpy.sign)
                    diff = diff + 1
                    diff = diff.multiply(0.5)
                    diff = diff.apply(numpy.trunc)
                    # add to yFGG (pihxxy in mawi.R)
                    pihxxy += diff

        # yFGG estimator (pihxyy in mawi.R)
        # equation 2.5b from Wellek 1996 paper
        # for (i in 1:m) for (j1 in 1:(n - 1)) for (j2 in (j1 + 1):n) pihxyy <- pihxyy + trunc(0.5 * (sign(x[i] - max(y[j1], y[j2])) + 1))
        for xi in range(m):
            for xj1 in range(n - 1):
                for xj2 in range(xj1 + 1, n):
                    diff = (data.loc[xi] - sims.loc[xj1].where(
                        sims.loc[xj1] > sims.loc[xj2], sims.loc[xj2]))
                    diff = diff.dropna(axis=0, how='all').dropna(axis=1,
                                                                 how='all')
                    diff = diff.apply(numpy.sign)
                    diff = diff + 1
                    diff = diff.multiply(0.5)
                    diff = diff.apply(numpy.trunc)
                    # add to yFGG (pihxyy in mawi.R)
                    pihxyy += diff

        # in equation 1.2
        wxy = wxy.divide(m * n)
        # in equation 2.5a, inverse of (m choose 2 = 0.5 * (m-1) * m), then divided by n
        pihxxy = pihxxy.multiply(2).divide(m * (m - 1) * n)
        # in equation 2.5b, inverse of (n choose 2 = 0.5 * (n-1) * n), then divided by m
        pihxyy = pihxyy.multiply(2).divide(n * (n - 1) * m)

        # variance estimator sigmah (same name as in mawi.R)
        # equation 2.6 from Wellek 1996 paper
        # sigmah <- sqrt((wxy - (m + n - 1) * wxy^2 + (m - 1) * pihxxy + (n - 1) * pihxyy)/(m * n))
        sigmah = wxy - (wxy**2).multiply(m + n - 1) + pihxxy.multiply(
            m - 1) + pihxyy.multiply(n - 1)
        sigmah = sigmah.divide(m * n)
        sigmah = sigmah**0.5

        # critical value
        # right hand of inequality 2.8 from Wellek 1996 paper
        phi = ((eqleng / 2) / sigmah)**2
        # crit <- sqrt(qchisq(alpha, 1, (eqleng/2/sigmah)^2))
        # Ca(phi) is the square root of the alpha-th quantile of the chi2-distribution with a single degree of freedom and non-centrality parameter phi square
        crit = pandas.DataFrame(data=ncx2.ppf(0.05, 1, phi),
                                index=sims.loc[0].index,
                                columns=sims.loc[0].columns)**.5

        # compare with Z
        # left hand side of the inequality 2.8 from Wellek 1996 paper
        Z = abs((wxy - eqctr).divide(sigmah))
        z = Z.copy(deep=True)
        """
		we want to maximize the amount of true alternative hypotheses, so
		we purposely changed the values to use the Wellek's test as an objective function to minimize
		"""
        # test the inequality 2.8 from Wellek 1996 paper
        # the test cannot reject null hypothesis: P[X-Y] < .5 - e1 or P[X-Y] > .5 + e2
        Z[z >= crit] = +1.0
        # the null hypothesis is rejected, therefore .5 - e1 < P[X-Y] < .5 + e2
        Z[z < crit] = +0.0

        if args.report:
            print('wxy estimator:\n', wxy, '\n')
            print('pihxxy estimator:\n', pihxxy, '\n')
            print('pihxyy estimator:\n', pihxyy, '\n')
            print('sigmah estimator:\n', sigmah, '\n')
            print('phi matrix:\n', phi, '\n')
            print('critical values:\n', crit, '\n')
            print('Z estimator: \n', Z, '\n')
            print(
                'Wellek\'s test matrix: a zero means data and simulations are equivalents within the threshold\n',
                Z)

        error['WMWET'] = '{:.0f}'.format(Z.sum().sum())

    # the same as WMWET, but as identical as the Wellek's paper (look for the heaviside function)
    if set(args.error).issuperset(set(['WMWET_paper'])):
        from scipy.stats import ncx2

        eps1_ = .3129  # Wellek's paper
        eps2_ = .2661  # Wellek's paper
        eqctr = 0.5 + (eps2_ - eps1_) / 2
        eqleng = eps1_ + eps2_

        # estimators needed for calculations
        wxy = pandas.DataFrame(index=y.loc[0].index,
                               columns=y.loc[0].columns).fillna(0)
        pihxxy = pandas.DataFrame(index=y.loc[0].index,
                                  columns=y.loc[0].columns).fillna(0)
        pihxyy = pandas.DataFrame(index=y.loc[0].index,
                                  columns=y.loc[0].columns).fillna(0)
        sigmah = pandas.DataFrame(index=y.loc[0].index,
                                  columns=y.loc[0].columns).fillna(0)

        # ŷ estimator (wxy in mawi.R)
        # for (i in 1:m) for (j in 1:n) wxy <- wxy + trunc(0.5 * (sign(x[i] - y[j]) + 1))
        for i in range(m):
            for j in range(n):
                diff = (x.loc[i] - y.loc[j]).dropna(axis=0, how='all').dropna(
                    axis=1, how='all')
                wxy += numpy.heaviside(diff, 0)

        # yFFG estimator (pihxxy in mawi.R)
        #for (i1 in 1:(m - 1)) for (i2 in (i1 + 1):m) for (j in 1:n) pihxxy <- pihxxy + trunc(0.5 * (sign(min(x[i1], x[i2]) - y[j]) + 1))
        for xi1 in range(m - 1):
            for xi2 in range(xi1 + 1, m):
                for xj in range(n):
                    diff1 = (x.loc[xi1] - y.loc[xj]).dropna(
                        axis=0, how='all').dropna(axis=1, how='all')
                    diff2 = (x.loc[xi2] - y.loc[xj]).dropna(
                        axis=0, how='all').dropna(axis=1, how='all')
                    pihxxy += numpy.heaviside(diff1, 0) * numpy.heaviside(
                        diff2, 0)

        # yFGG estimator (pihxyy in mawi.R)
        # for (i in 1:m) for (j1 in 1:(n - 1)) for (j2 in (j1 + 1):n) pihxyy <- pihxyy + trunc(0.5 * (sign(x[i] - max(y[j1], y[j2])) + 1))
        for xi in range(m):
            for xj1 in range(n - 1):
                for xj2 in range(xj1 + 1, n):
                    diff1 = (x.loc[xi] - y.loc[xj1]).dropna(
                        axis=0, how='all').dropna(axis=1, how='all')
                    diff2 = (x.loc[xi] - y.loc[xj2]).dropna(
                        axis=0, how='all').dropna(axis=1, how='all')
                    pihxyy += numpy.heaviside(diff1, 0) * numpy.heaviside(
                        diff2, 0)

        #
        wxy = wxy.divide(m * n)
        pihxxy = pihxxy.multiply(2).divide(m * (m - 1) * n)
        pihxyy = pihxyy.multiply(2).divide(n * (n - 1) * m)

        # variance estimator sigmah (same name as in mawi.R)
        # sigmah <- sqrt((wxy - (m + n - 1) * wxy^2 + (m - 1) * pihxxy + (n - 1) * pihxyy)/(m * n))
        sigmah = wxy - (wxy**2).multiply(m + n - 1) + pihxxy.multiply(
            m - 1) + pihxyy.multiply(n - 1)
        sigmah = sigmah.divide(m * n)
        sigmah = sigmah**0.5

        # critical value
        # crit <- sqrt(qchisq(alpha, 1, (eqleng/2/sigmah)^2))
        phi = (eqleng / 2 / sigmah)**2
        crit = pandas.DataFrame(data=ncx2.ppf(0.05, 1, phi),
                                index=y.loc[0].index,
                                columns=y.loc[0].columns)**.5

        # compare with Z
        Z = abs((wxy - eqctr).divide(sigmah))
        z = Z.copy(deep=True)
        Z[z <
          crit] = +0.0  # the null hypothesis is rejected, therefore .5 - e1 < P[X-Y] < .5 + e2
        Z[z >=
          crit] = +1.0  # the test cannot reject the null hypothesis: P[X-Y] < .5 - e1 or P[X-Y] > .5 + e2

        if args.report:
            print('wxy estimator:\n', wxy, '\n')
            print('pihxxy estimator:\n', pihxxy, '\n')
            print('pihxyy estimator:\n', pihxyy, '\n')
            print('sigmah estimator:\n', sigmah, '\n')
            print('phi matrix:\n', phi, '\n')
            print('critical values:\n', crit, '\n')
            print('Z estimator: \n', Z, '\n')
            print(
                'Wellek\'s test matrix: a zero means data and simulations are equivalents within the threshold\n',
                Z)

        error['WMWET_paper'] = '{:.0f}'.format(Z.sum().sum())

    if set(args.error).issuperset(set(['TOST'])):
        print(
            "WARNING: data and/or simulations not necessarily are normal distributions."
        )
        print(
            "As a test-bed, we consider data and simulations have unequal standard deviations"
        )
        print(
            "See https://www.statsmodels.org/devel/generated/statsmodels.stats.weightstats.ttost_ind.html for more information"
        )
        from statsmodels.stats.weightstats import ttost_ind

        if not args.do_all:
            data_stdv = dostdv(data, len_data)

        # reshape data and sims to allow calculate the test in a for-loop
        tost_sims = numpy.dstack([sims.loc[x] for x in range(len_sims)])
        # since we operate numpy arrays without labels, we must ensure sims and data indexes and columns have the same order
        index = data.loc[0].index
        columns = data.loc[0].columns
        tost_data = numpy.dstack([
            data.loc[x].reindex(columns=columns, index=index)
            for x in range(len_data)
        ])

        p = numpy.zeros((len(data_stdv.index), len(data_stdv.columns)))
        row = 0
        for x, y, lim in zip(tost_sims, tost_data, data_stdv.values):
            for col, _ in enumerate(data_stdv.columns):
                p[row, col] = ttost_ind(x[col], y[col], -lim[col],
                                        +lim[col])[0]
            row += 1

        # transform matrix of p-values into a non-rejection DataFrame (if p-value less than 5% -> rejects, but set to zero)
        p = pandas.DataFrame(index=index, columns=columns, data=p)
        P = p.copy(deep=True)
        P[p >= .05] = +1.0
        P[p < .05] = +0.0

        if args.report:
            print(
                'Two one-sided t-tests matrix: a zero means data and simulations are equivalents within one standard deviation threshold\n',
                P)

        error['TOST'] = '{:.0f}'.format(P.sum().sum())

    # Mann-Whitney U-test
    def mwut(data, sims, alternative):
        ucrit = pandas.read_csv(args.crit,
                                sep=None,
                                engine='python',
                                header=0,
                                index_col=0)
        udata = pandas.DataFrame(index=sims.loc[0].index,
                                 columns=sims.loc[0].columns).fillna(0)
        usims = pandas.DataFrame(index=sims.loc[0].index,
                                 columns=sims.loc[0].columns).fillna(0)

        for i in range(len_data):
            for j in range(len_sims):
                Diff = (data.loc[i] - sims.loc[j]).dropna(
                    axis=0, how='all').dropna(axis=1, how='all')
                diff = Diff.copy(deep=True)
                # transform data
                # if data < sims, count -1.0
                Diff[diff < 0] = -1.0
                # if data > sims, count +1.0
                Diff[diff > 0] = +1.0
                # if data = sims, count +0.5
                Diff[diff == 0] = +0.5
                # count how many times is data < sims (udata and usims are complementary)
                diff = Diff.copy(deep=True)
                udata += Diff[diff == -1.0].fillna(0).divide(-1) + Diff[
                    diff == +0.5].fillna(0)
                usims += Diff[diff == +1.0].fillna(0).divide(+1) + Diff[
                    diff == +0.5].fillna(0)

        if alternative == 'two-sided':
            # bigU is max(udata, usims), where udata and usims are DataFrames
            bigU = udata.where(udata >= usims).fillna(
                usims.where(usims >= udata))
        if alternative == 'less':
            bigU = udata
        if alternative == 'greater':
            bigU = usims

        U = len_data * len_sims - bigU
        u = U.copy(deep=True)
        # U is significant if it is less than or equal to a critical value
        U[u <= ucrit.loc[len_sims, str(len_data)]] = +1.0
        U[u > ucrit.loc[len_sims, str(len_data)]] = +0.0

        if args.report:
            print('U-estimator for data\n', udata, '\n')
            print('U-estimator for sims\n', usims, '\n')
            if alternative == 'two-sided':
                print(
                    'U-test matrix: A one means data and sims are differents\n',
                    U, '\n')
            if alternative == 'less':
                print(
                    'U-test matrix: A one means data is smaller than sims (shifted to the right)\n',
                    U, '\n')
            if alternative == 'greater':
                print(
                    'U-test matrix: A one means data is greater than sims (shifted to the left)\n',
                    U, '\n')

        return '{:.0f}'.format(U.sum().sum()), U

    if set(args.error).issuperset(set(['MWUT'])):
        if (len_data >= 3 and len_sims >= 3):
            error['MWUT'] = mwut(data, sims, 'two-sided')[0]
        else:
            error['MWUT'] = str(numpy.nan)

    if set(args.error).issuperset(set(['DUT'])):
        if (len_data >= 3 and len_sims >= 3):
            # set what the user wants
            if args.lower is not None and args.upper is None:
                args.upper = args.lower  # symmetric equivalence interval
            if args.lower is None and args.upper is not None:
                args.lower = args.upper  # symmetric equivalence interval

            if args.lower is None and args.upper is None:
                if not args.do_all:
                    if args.stdv == 'sims':
                        lower = upper = dostdv(sims, len_sims)
                    else:
                        lower = upper = dostdv(data, len_data)
                else:
                    if args.stdv == 'sims':
                        lower = upper = sims_stdv
                    else:
                        lower = upper = data_stdv

            # divide by factor
            lower = lower / float(args.factor)
            upper = upper / float(args.factor)

            # copy simulations to a temporary variable
            tmp = sims

            # test lower limit
            new_sims = []
            for i in range(len_sims):
                new_sims.append(tmp.loc[i] - lower)
            sims = pandas.concat(new_sims, keys=range(len_sims))

            # test data > sims - lower with one-tail U-test
            LB = mwut(data, sims, 'greater')[1]

            # test upper limit
            new_sims = []
            for i in range(len_sims):
                new_sims.append(tmp.loc[i] + upper)
            sims = pandas.concat(new_sims, keys=range(len_sims))

            # test data < sims + upper with one
            UB = mwut(data, sims, 'less')[1]

            # rejection DataFrame (U-test report with ones true alternative hypotheses)
            # both one-sided tests should reject the null hypotheses
            U = LB * UB
            # However, we minimize the number of non-rejected null hypotheses
            # transform U into a non-rejection DataFrame.
            U = numpy.logical_xor(U.values, 1).astype(int)
            U = pandas.DataFrame(index=LB.index, columns=LB.columns, data=U)

            if args.report:
                print(
                    'Double U-test matrix: 1.0 means data and sims are not equivalents if sims are shifted:\n',
                    U, '\n')

            error['DUT'] = '{:.0f}'.format(U.sum().sum())

        else:
            error['DUT'] = str(numpy.nan)