def wincor(x, y, tr=.2):

    """
    Compute the winsorized correlation between `x` and `y`.
    This function also returns the winsorized covariance.


    :param x: Pandas Series
    Data for group one

    :param y: Pandas Series
    Data for group two

    :param tr: float
    Proportion to winsorize (default is .2)

    :return:
    Dictionary of results

    cor: float
    Winsorized correlation

    nval: int
    Number of observations

    sig: float
    p-value

    wcov: float
    Winsorized covariance
    """

    if type(x) is not np.ndarray:
        x, y=pandas_to_arrays([x, y])

    m1 = np.c_[x, y] # cbind
    m1 = m1[~np.isnan(m1).any(axis=1)]
    nval = m1.shape[0]
    x = m1[:, 0]
    y = m1[:, 1]
    g = np.floor(tr * len(x))
    xvec = winsorize(x, limits=(tr,tr))
    yvec = winsorize(y, limits=(tr,tr))
    wcor = np.corrcoef(xvec, yvec)[0,1]
    wcov = np.cov(xvec, yvec)[0,1]
    test = wcor * np.sqrt((len(x) - 2) / (1. - wcor ** 2))
    sig = 2 * (1 - t.cdf(abs(test), len(x) - 2 * g - 2))

    res={'cor': wcor, 'wcov': wcov, 'sig': sig, 'nval': nval}

    return res
def corb(corfun, x, y, alpha, nboot, *args, seed=False):

    """
    Compute a 1-alpha confidence interval for a
    correlation using percentile bootstrap method
    The function `corfun` is any function that returns a
    correlation coefficient. The functions pbcor and
    wincor follow this convention. When using
    Pearson's correlation, and when n<250, use
    lsfitci instead (not yet implemented).

    Note that arguments up to and including `args` are positional arguments

    :param corfun: function
    corfun is any function that returns a correlation coefficient

    :param x: Pandas Series
    Data for group one

    :param y: Pandas Series
    Data for group two

    :param alpha: float
    Alpha level (default is .05)

    :param nboot: int
    Number of bootstrap samples

    :param args: list/value
    List of arguments to corfun (e.g., .2)

    :param seed: bool
    Random seed for reprodicible results. Default is `False`.

    :return:
    Dictionary of results

    ci: list
    Confidence interval

    cor: float
    Correlation estimate

    p_value: float
    p-value

    """

    x, y=pandas_to_arrays([x, y])


    m1 = np.c_[x, y] # cbind
    m1 = m1[~np.isnan(m1).any(axis=1)]
    nval = m1.shape[0]
    x = m1[:, 0]
    y = m1[:, 1]
    est = corfun(x, y, *args)['cor']#[0]

    if seed:
        np.random.seed(seed)

    data_inds = np.random.choice(len(x), size=(nboot, len(x)))
    bvec = np.array([corbsub(row_inds, x, y, corfun, *args) for row_inds in data_inds])

    ihi = int(np.floor((1 - alpha / 2) * nboot + .5))
    ilow = int(np.floor((alpha / 2) * nboot + .5))
    bsort = sorted(bvec)
    corci = [bsort[ilow], bsort[ihi]]
    phat = sum(bvec < 0) / nboot
    sig =  2 * min(phat, 1 - phat)

    #return corci, sig, est
    return {'ci': corci, 'p_value': sig, 'cor': est}
def pbcor(x, y, beta=.2):

    """
    Compute the percentage bend
    correlation between `x` and `y`


    :param x: Pandas Series
    Data for group one

    :param y: Pandas Series
    Data for group two

    :param beta: float
    `0 < beta < .5`. Beta is analogous to trimming in
    other functions and related to the measure of
    dispersion used in the percentage bend
    calculation.

    :return:
    Dictionary of results

    cor: float
    Correlation

    nval: int
    Number of observations

    p_value
    p-value

    test: float
    Test statistic

    """

    if type(x) is not np.ndarray:
        x, y = pandas_to_arrays([x, y])

    if len(x) != len(y):
        raise Exception("The arrays do not have equal lengths")

    m1 = np.c_[x, y] # cbind
    m1 = m1[~np.isnan(m1).any(axis=1)]
    nval = m1.shape[0]
    x = m1[:, 0]
    y = m1[:, 1]
    temp = np.sort(abs(x - np.median(x)))
    omhatx = temp[int(np.floor((1 - beta) * len(x)))-1]
    temp = np.sort(abs(y - np.median(y)))
    omhaty = temp[int(np.floor((1 - beta) * len(y)))-1]

    a = (x - pbos(x, beta)) / omhatx
    b = (y - pbos(y, beta)) / omhaty

    a = np.where(a <= -1, -1, a)
    a = np.where(a >= 1, 1, a)
    b = np.where(b <= -1, -1, b)
    b = np.where(b >= 1, 1, b)

    pbcor_result = sum(a * b) / np.sqrt(sum(a ** 2) * sum(b ** 2))
    test = pbcor_result * np.sqrt((len(x) - 2) / (1 - pbcor_result ** 2))
    sig = 2 * (1 - t.cdf(abs(test), len(x) - 2))

    res = {'cor': pbcor_result, 'test': test, 'p_value': sig, 'nval': nval}
    return res
def lindepbt(x, tr=.2, con=None, alpha=.05, nboot=599, dif=True, seed=False):
    """
    Multiple comparisons on trimmed means with FWE controlled with Rom's method
    Using a bootstrap-t method.

    :param x: Pandas DataFrame
    Each column in the data represents a different group

    :param tr: float
    Proportion to trim (default is .2)

    :param con: array
    `con` is a J (number of groups) by d (number of contrasts)
    matrix containing the contrast coefficents of interest.
    All linear constrasts can be created automatically by using the function [con1way](J)
    (the result of which can be used for `con`). The default is `None` and in this
    case all linear contrasts are created automatically.

    :param alpha: float
    Alpha level. Default is .05.

    :param nboot: int
    Number of bootstrap samples (default is 2000)

    :param dif: bool
    When `True`, use difference scores, otherwise use marginal distributions

    :param seed: bool
    Random seed for reprodicible results (default is `False`)

    :return:
    Dictionary of results

    con: array
    Contrast matrix

    num_sig: int
    Number of observations for each group

    psihat: DataFrame
    Difference score and CI for each contrast

    test: DataFrame
    Test statistic, p-value, critical value, and standard error
    for each contrast
    """

    called_directly = False
    if type(x) is pd.DataFrame:
        x = pandas_to_arrays(x)
        x = remove_nans_based_on_design(x,
                                        design_values=len(x),
                                        design_type='dependent_groups')
        x = np.r_[x].T
        called_directly = True

    from hypothesize.measuring_associations import wincor

    if seed:
        np.random.seed(seed)

    if con is None:
        con = con2way(1, x.shape[1])[1]  # all pairwise
        ncon = con.shape[1]

    else:
        ncon = con.shape[1]

    x = x[~np.isnan(x).any(axis=1)]
    n = x.shape[0]
    J = x.shape[1]
    nval = x.shape[0]
    h1 = nval - 2 * np.floor(tr * nval)
    #df=h1-1
    xbar = trim_mean(x, tr)

    if alpha == .05:

        dvec = [
            .05, .025, .0169, .0127, .0102, .00851, .0073, .00639, .00568,
            .00511
        ]

        if ncon > 10:
            avec = .05 / np.arange(11, ncon + 1)
            dvec = np.append(dvec, avec)

    elif alpha == .01:

        dvec = [
            .01, .005, .00334, .00251, .00201, .00167, .00143, .00126, .00112,
            .00101
        ]

        if ncon > 10:
            avec = .01 / np.arange(11, ncon + 1)
            dvec = np.append(dvec, avec)

    else:
        dvec = alpha / np.arange(1, ncon + 1)

    psihat = np.zeros([ncon, 4])
    test = np.zeros([ncon, 5])
    temp1 = np.array([])

    for d in range(ncon):
        psihat[d, 0] = d

        if not dif:
            psihat[d, 1] = np.sum(con[:, d] * xbar)
            sejk = 0

            for j in range(J):
                for k in range(J):
                    djk = (nval - 1) * wincor(x[:, j], x[:, k],
                                              tr)['wcov'] / (h1 * (h1 - 1))
                    sejk = sejk + con[j, d] * con[k, d] * djk

            sejk = np.sqrt(sejk)
            test[d, 0] = d
            test[d, 1] = np.sum(con[:, d] * xbar) / sejk
            test[d, 4] = sejk

            data = np.random.randint(n, size=(nboot, n))
            xcen = np.full([x.shape[0], x.shape[1]], np.nan)
            for j in range(J):
                xcen[:, j] = x[:, j] - trim_mean(x[:, j], tr)

            bvec = [
                lindep_sub(data_row, xcen, con[:, d], tr=tr)
                for data_row in data
            ]

            bsort = np.sort(np.abs(bvec))
            ic = round(
                (1 - alpha) * nboot) - 1  # correct for python with the "- 1"?
            psihat[d, 2] = psihat[d, 1] - bsort[ic] * test[d, 4]
            psihat[d, 3] = psihat[d, 1] + bsort[ic] * test[d, 4]
            p_value = np.mean(np.abs(test[d, 1]) <= np.abs(bvec))
            temp1 = np.append(temp1, p_value)

        elif dif:

            for j in range(J):
                if j == 0:
                    dval = con[j, d] * x[:, j]

                elif j > 0:
                    dval = dval + con[j, d] * x[:, j]

            temp = trimcibt(dval, tr=tr, alpha=alpha, nboot=nboot, seed=seed)
            temp1 = np.append(temp1, temp['p_value'])
            test[d, 0] = d
            test[d, 1] = temp['test_stat']  ## missing in R?
            test[d, 4] = trimse(dval, tr=tr)
            psihat[d, 1] = trim_mean(dval, tr)
            psihat[d, 2] = temp['ci'][0]
            psihat[d, 3] = temp['ci'][1]

    test[:, 2] = temp1
    temp2 = (-temp1).argsort()
    zvec = dvec[:ncon]
    test[temp2, 3] = zvec

    # if flagcon
    num_sig = np.sum(test[:, 2] <= test[:, 3])

    if called_directly:

        test = pd.DataFrame(
            test, columns=["con_num", "test", "p_value", "p_crit", "se"])
        psihat = pd.DataFrame(
            psihat, columns=["con_num", "psihat", "ci_lower", "ci_upper"])

    return {'test': test, 'psihat': psihat, 'con': con, 'num_sig': num_sig}
def pb2gen(x, y, est, *args, alpha=.05, nboot=2000, seed=False):
    """
    Compute a bootstrap confidence interval for the
    the difference between any two parameters corresponding to two
    independent groups.

    Note that arguments up to and including `args` are positional arguments

    :param x: Pandas Series
    Data for group one

    :param y: Pandas Series
    Data for group two

    :param est: function
    Measure of location (currently only `trim_mean` is supported)

    :param args: list/value
    Parameter(s) for measure of location (e.g., .2)

    :param alpha: float
    Alpha level (default is .05)

    :param nboot: int
    Number of bootstrap samples (default is 2000)

    :param seed: bool
    Random seed for reprodicible results (default is `False`)

    :return:
    Dictionary of results

    ci: list

    Confidence interval

    est_1: float
    Estimated value (based on `est`) for group one

    est_2: float
    Estimated value (based on `est`) for group two

    est_dif: float
    Estimated difference between group one and two

    n1: int
    Number of observations in group one

    n2: int
    Number of observations in group two

    p_value: float

    p-value

    variance: float
    Variance of group one and two
    """

    x, y = pandas_to_arrays([x, y])

    x = x[~np.isnan(x)]
    y = y[~np.isnan(y)]

    if seed:
        np.random.seed(seed)

    datax = np.random.choice(x, size=(nboot, len(x)))
    datay = np.random.choice(y, size=(nboot, len(y)))

    bvecx = est(datax, *args, axis=1)
    bvecy = est(datay, *args, axis=1)

    bvec = np.sort(bvecx - bvecy)
    low = round((alpha / 2) * nboot)  #+ 1
    up = nboot - low - 2
    temp = np.sum(bvec < 0) / nboot + np.sum(bvec == 0) / (2 * nboot)
    sig_level = 2 * (min(temp, 1 - temp))
    se = np.var(bvec)

    results = {
        'est_1': est(x, *args),
        'est_2': est(y, *args),
        'est_dif': est(x, *args) - est(y, *args),
        'ci': [bvec[low], bvec[up]],
        'p_value': sig_level,
        'variance': se,
        'n1': len(x),
        'n2': len(y)
    }

    return results
def l2drmci(x,
            y,
            est,
            *args,
            pairwise_drop_na=True,
            alpha=.05,
            nboot=2000,
            seed=False):
    """
    Compute a bootstrap confidence interval for a
    measure of location associated with the distribution of x-y.
    That is, compare x and y by looking at all possible difference scores
    in random samples of `x` and `y`. `x` and `y` are possibly dependent.

    Note that arguments up to and including `args` are positional arguments

    :param x: Pandas Series
    Data for group one

    :param y: Pandas Series
    Data for group two

    :param est: function
    Measure of location (currently only `trim_mean` is supported)

    :param args: list/value
    Parameter(s) for measure of location (e.g., .2)

    :param pairwise_drop_na: bool
    If True, treat data as dependent and remove any row with missing data. If False,
    remove missing data for each group seperately (cannot deal with unequal sample sizes)

    :param alpha: float
    Alpha level (default is .05)

    :param nboot: int
    Number of bootstrap samples (default is 2000)

    :param seed: bool
    Random seed for reprodicible results (default is `False`)

    :return:
    Dictionary of results

    ci: list

    Confidence interval

    p_value: float

    p-value
    """

    x, y = pandas_to_arrays([x, y])

    if pairwise_drop_na:
        m1 = np.c_[x, y]  # cbind
        x = m1[~np.isnan(m1).any(axis=1)]

    else:
        x = x[~np.isnan(x)]
        y = y[~np.isnan(y)]

        if len(x) != len(y):
            raise Exception(
                "With unequal sample sizes, you might consider wmwpb "
                "(currently not implemented)")

        else:
            x = np.c_[x, y]  # cbind

    if seed:
        np.random.seed(seed)

    data = np.random.choice(x.shape[0], size=(nboot, len(x)))

    bvec = np.full(nboot, np.nan)
    for i in range(nboot):
        bvec[i] = \
             loc2dif(x[data[i,:], 0], x[data[i,:], 1], est, *args,
                     drop_na=pairwise_drop_na)

    bvec = np.sort(bvec)
    low = int(np.round((alpha / 2) * nboot) + 1) - 1
    up = nboot - low - 2
    temp = np.sum(bvec < 0) / nboot + np.sum(bvec == 0) / (2 * nboot)
    sig_level = 2 * (np.min([temp, 1 - temp]))
    ci = [bvec[low], bvec[up]]

    results = dict(zip(['ci', 'p_value'], [ci, sig_level]))

    return results
def yuenbt(x, y, tr=.2, alpha=.05, nboot=599, seed=False):
    """
    Compute a 1-alpha confidence interval for the difference between
    the trimmed means corresponding to two independent groups.
    The bootstrap-t method is used. During the bootstrapping,
    the absolute value of the test statistic is used (the "two-sided method").


    :param x: Pandas Series
    Data for group one

    :param y: Pandas Series
    Data for group two

    :param tr: float
    Proportion to trim (default is .2)

    :param alpha: float
    Alpha level (default is .05)

    :param nboot: int
    Number of bootstrap samples (default is 599)

    :param seed: bool
    Random seed for reprodicible results. Default is `False`.

    :return:
    Dictionary of results

    ci: list

    Confidence interval

    est_dif: float
    Estimated difference between group one and two

    est_1: float
    Estimated value (based on `est`) for group one

    est_2: float
    Estimated value (based on `est`) for group two

    p_value: float

    p-value

    test_stat: float
    Test statistic

    """

    x, y = pandas_to_arrays([x, y])

    if seed:
        np.random.seed(seed)

    ci = []
    x = x[~np.isnan(x)]
    y = y[~np.isnan(y)]

    xcen = x - trim_mean(x, tr)
    ycen = y - trim_mean(y, tr)

    test_stat = (trim_mean(x, tr) - trim_mean(y, tr)) / \
           np.sqrt(trimse(x, tr = tr) ** 2 + trimse(y, tr = tr) ** 2)

    datax = np.random.choice(xcen, size=(nboot, len(x)))
    datay = np.random.choice(ycen, size=(nboot, len(y)))

    top = trim_mean(datax, .2, axis=1) - trim_mean(datay, .2, axis=1)

    #botx = list(map(lambda row: trimse(row,.2), datax))
    botx = np.array([trimse(x) for x in datax])
    boty = np.array([trimse(x) for x in datay])
    tval = top / np.sqrt(botx**2 + boty**2)
    tval = abs(tval)
    tval = sorted(tval)
    icrit = int(np.floor((1 - alpha) * nboot + .5))
    #ibot = int(np.floor(alpha * nboot / 2 + .5))
    #itop = int(np.floor((1 - alpha / 2) * nboot + .5))
    se = np.sqrt((trimse(x, tr))**2 + (trimse(y, tr))**2)
    ci.append(trim_mean(x, tr) - trim_mean(y, tr) - tval[icrit] * se)
    ci.append(trim_mean(x, tr) - trim_mean(y, tr) + tval[icrit] * se)
    p_value = sum(np.abs(test_stat) <= np.abs(tval)) / nboot
    est_x = trim_mean(x, tr)
    est_y = trim_mean(y, tr)
    est_dif = est_x - est_y

    results = {
        'ci': ci,
        'test_stat': test_stat,
        'p_value': p_value,
        'est_x': est_x,
        'est_y': est_y,
        'est_dif': est_dif
    }

    return results
def tmcppb(x,
           est,
           *args,
           con=None,
           bhop=False,
           alpha=.05,
           nboot=None,
           seed=False):
    """
    Multiple comparisons for J independent groups using trimmed means and
    the percentile bootstrap method. Rom’s method is used to control the
    probability of one or more type I errors. For C > 10 hypotheses,
    or when the goal is to test at some level other than .05 and .01,
    Hochberg’s method is used. Setting the argument `bhop` to `True` uses the
    Benjamini–Hochberg method instead.

    Note that arguments up to and including `args` are positional arguments

    :param x: Pandas DataFrame
    Each column represents a group of data

    :param est: function
    Measure of location (currently only `trim_mean` is supported)

    :param args: list/value
    Parameter(s) for measure of location (e.g., .2)

    :param con: array
    `con` is a J (number of columns) by d (number of contrasts)
    matrix containing the contrast coefficents of interest.
    All linear constrasts can be created automatically by using the function [con1way](J)
    (the result of which can be used for `con`). The default is `None` and in this
    case all linear contrasts are created automatically.

    :param bhop: bool
    If `True`, the Benjamini–Hochberg method is used to control FWE

    :param alpha: float
    Alpha level. Default is .05.

    :param nboot: int
    Number of bootstrap samples (default is 2000)

    :param seed: bool
    Random seed for reproducible results. Default is `False`.

    :return:
    Dictionary of results

    con: array
    Contrast matrix

    num_sig: int
    Number of statistically significant results

    output: DataFrame
    Difference score, p-value, critical value, and CI for each contrast
    """

    x = pandas_to_arrays(x)
    x = remove_nans_based_on_design(x, len(x), 'independent_groups')
    J = len(x)

    mvec = [est(i, *args) for i in x]

    if con is None:
        con = con1way(J)

    ncon = con.shape[1]

    if not nboot:
        nboot = 5000
        if J <= 8:
            nboot = 4000
        elif J <= 3:
            nboot = 2000

    if not bhop:

        if alpha == .05:
            dvec = [
                .05, .025, .0169, .0127, .0102, .00851, .0073, .00639, .00568,
                .00511
            ]

            if ncon > 10:
                avec = .05 / np.arange(11, ncon + 1)
                dvec = [dvec, avec]

        elif alpha == .01:
            dvec = [
                .01, .005, .00334, .00251, .00201, .00167, .00143, .00126,
                .00112, .00101
            ]

            if ncon > 10:
                avec = .01 / np.arange(11, ncon + 1)
                dvec = [dvec, avec]

        else:  #not (alpha != .05 or alpha != .01):
            dvec = alpha / np.arange(1, ncon + 1)

    else:
        dvec = (ncon - np.arange(1, ncon + 1) + 1) * alpha / ncon

    if seed:
        np.random.seed(seed)

    bvec = np.full([J, nboot], np.nan)
    for i, j in enumerate(x):
        data = np.random.choice(j, size=(nboot, len(j)))
        bvec[i, :] = [est(row, *args) for row in data]

    bcon = con.T @ bvec
    tvec = con.T @ mvec
    test = np.full(ncon, np.nan)
    for d in range(ncon):
        tv = np.sum(bcon[d, :] == 0) / nboot
        test[d] = np.sum(bcon[d, :] > 0) / nboot + .5 * tv
        if test[d] > .5:
            test[d] = 1 - test[d]

    output = np.full([ncon, 6], np.nan)
    test = 2 * test
    temp2 = (-test).argsort()
    zvec = dvec[:ncon]
    output[temp2, 3] = zvec
    icl = int(np.round(dvec[-1] * nboot / 2) + 1) - 1
    icu = nboot - icl - 3

    for ic in range(ncon):
        output[ic, 1] = tvec[ic]
        output[ic, 0] = ic
        output[ic, 2] = test[ic]
        temp = np.sort(bcon[ic, :])
        output[ic, 4] = temp[icl]
        output[ic, 5] = temp[icu]

    num_sig = np.sum(output[:, 2] <= output[:, 3])
    cols = ["con_num", "psihat", "p_value", "p_crit", "ci_lower", "ci_upper"]
    output = pd.DataFrame(output, columns=cols)

    results = {'output': output, 'con': con, 'num_sig': num_sig}

    return results
def linconb(x, con, tr=.2, alpha=.05, nboot=599, seed=False):
    """
    Compute a 1-alpha confidence interval for a set of d linear contrasts
    involving trimmed means using the bootstrap-t bootstrap method.
    Independent groups are assumed. CIs are adjusted to control FWE
    (p values are not adjusted).


    :param x: DataFrame
    Each column represents a group of data

    :param con: array
    `con` is a J (number of columns) by d (number of contrasts)
    matrix containing the contrast coefficents of interest.
    All linear constrasts can be created automatically by using the function [con1way](J)
    (the result of which can be used for `con`).

    :param tr: float
    Proportion to trim (default is .2)

    :param alpha: float
    Alpha level (default is .05)

    :param nboot: int
    Number of bootstrap samples (default is 2000)

    :param seed: bool
    Random seed for reprodicible results. Default is `False`.

    :return:
    Dictionary of results

    con: array
    Contrast matrix

    crit: float
    Critical value

    n: list
    Number of observations for each group

    psihat: DataFrame
    Difference score and CI for each contrast

    test: DataFrame
    Test statistic, standard error, and p-value for each contrast


    """

    x = pandas_to_arrays(x)

    J = len(x)
    x = np.asarray([j[~np.isnan(j)] for j in x])
    #Jm = J - 1
    #d = (J ** 2 - J) / 2

    if con.shape[0] != len(x):
        raise Exception(
            "The number of groups does not match the number of contrast coefficients."
        )

    bvec = np.zeros([nboot, J, 2])

    if seed:
        np.random.seed(seed)

    nsam = [len(xi) for xi in x]
    for j in range(J):

        xcen = x[j] - trim_mean(x[j], tr)
        data = np.random.choice(xcen, size=(nboot, len(x[j])))

        for i, row in enumerate(data):
            bvec[i, j, :] = trimparts(row, tr)

    m1 = bvec[:, :, 0].T
    m2 = bvec[:, :, 1].T
    boot = np.zeros([con.shape[1], nboot])
    for d in range(con.shape[1]):
        top = np.asarray([trimpartt(row, con[:, d]) for row in m1.T])
        consq = con[:, d]**2
        bot = np.asarray([trimpartt(row, consq) for row in m2.T])
        boot[d, :] = np.abs(top) / np.sqrt(bot)

    testb = np.asarray([max(row) for row in boot.T])
    ic = int(np.floor((1 - alpha) * nboot) - 1)  # one less than R
    testb = np.sort(testb)
    psihat = np.zeros([con.shape[1], 4])
    test = np.zeros([con.shape[1], 4])

    for d in range(con.shape[1]):
        test[d, 0] = d
        psihat[d, 0] = d
        testit = lincon(x,
                        np.array([con[:, d]]).T, tr,
                        alpha)  # column slice of contrast matrix
        #test[d, 1]=testit['test'][0, 1]
        test[d, 1] = testit['test']['test'][0]
        #pval = np.mean((abs(testit['test'][0, 1]) < boot[d,:]))
        pval = np.mean((abs(testit['test']['test'][0]) < boot[d, :]))
        test[d, 3] = pval
        #print(testit['test'])
        #print(testit['psihat'])
        # psihat[d, 2] = testit['psihat'][0, 1] - testb[ic] * testit['test'][0, 3]
        # psihat[d, 3] = testit['psihat'][0, 1] + testb[ic] * testit['test'][0, 3]
        # psihat[d, 1] = testit['psihat'][0, 1]
        psihat[d, 2] = testit['psihat']['psihat'][
            0] - testb[ic] * testit['test']['se'][0]
        psihat[d, 3] = testit['psihat']['psihat'][
            0] + testb[ic] * testit['test']['se'][0]
        psihat[d, 1] = testit['psihat']['psihat'][0]
        #test[d, 2] = testit['test'][0, 3]
        test[d, 2] = testit['test']['se'][0]

    psihat_col_names = ['contrast_index', 'psihat', 'ci_low', 'ci_up']
    test_col_names = ['contrast_index', 'test', 'se', 'p_value']

    psihat = pd.DataFrame(psihat, columns=psihat_col_names)
    test = pd.DataFrame(test, columns=test_col_names)

    return {
        'n': nsam,
        'psihat': psihat,
        'test': test,
        'crit': testb[ic],
        'con': con
    }
def ydbt(x, y, tr=.2, alpha=.05, nboot=599, side=True, seed=False):
    """
    Using the bootstrap-t method,
    compute a .95 confidence interval for the difference between
    the marginal trimmed means of paired data.
    By default, 20% trimming is used with 599 bootstrap samples.


    :param x: Pandas Series
    Data for group one

    :param y: Pandas Series
    Data for group two

    :param tr: float
    Proportion to trim (default is .2)

    :param alpha: float
    Alpha level. Default is .05.

    :param nboot: int
    Number of bootstrap samples (default is 2000)

    :param side: boolWhen `True` the function returns a symmetric CI and a p value,
    otherwise the function returns equal-tailed CI (no p value)

    :param seed: bool
    Random seed for reprodicible results (default is `False`)

    :return:
    Dictionary of results

    ci: list
    Confidence interval

    dif: float
    Difference between group one and two

    p_value: float
    p-value
    """

    x = pandas_to_arrays([x, y])
    x = remove_nans_based_on_design(x, 2, 'dependent_groups')
    x, y = [x[0], x[1]]

    if seed:
        np.random.seed(seed)

    data = np.random.randint(len(x), size=(nboot, len(x)))

    xcen = x - trim_mean(x, tr)
    ycen = y - trim_mean(y, tr)

    bvec = [tsub(row, xcen, ycen, tr) for row in data]

    dotest = yuend(x, y, tr=tr)

    estse = dotest['se']
    p_value = np.nan
    dif = trim_mean(x, tr) - trim_mean(y, tr)
    ci = []

    if not side:
        print('p_value is only returned when side=True')
        ilow = round((alpha / 2) * nboot) - 1
        ihi = nboot - ilow - 2
        bsort = np.sort(bvec)
        ci.append(dif - bsort[ihi] * estse)
        ci.append(dif - bsort[ilow + 1] * estse)

    else:
        bsort = np.sort(np.abs(bvec))
        ic = round((1 - alpha) * nboot) - 1
        ci.append(dif - bsort[ic] * estse)
        ci.append(dif + bsort[ic] * estse)
        p_value = (np.sum(np.abs(dotest['teststat']) <= np.abs(bvec))) / nboot

    return {'ci': ci, 'dif': dif, 'p_value': p_value}