Exemple #1
0
def MoodTest(X, Y, side='equal', alpha=0.05):
    assert side in ('equal', 'upper', 'lower')
    assert is_seq(X) and is_seq(
        Y), 'Mood test expects sequence object stored data'
    assert all(map(is_math, X)) and all(map(
        is_math, Y)), 'Mood test expects numerical data'

    # clean data
    m, n = len(X), len(Y)
    combine_col = list(X) + list(Y)
    rank_pair_data = dict(zip(combine_col, get_ranks(combine_col)))

    # statistic something
    hypothesis_mean = (m + n + 1) / 2.0
    M = sum([(rank_pair_data[x] - hypothesis_mean)**2 for x in X])
    EM = m * (m + n + 1) * (m + n - 1) / 12.0
    VM = m * n * (m + n + 1) * (m + n + 2) * (m + n - 2) / 180.0

    # calculate the statistic value
    Z = (M - EM) / sqrt(VM)
    if m + n <= 30:
        Z += 1 / (2.0 * sqrt(VM))

    pvalue = Ncdf(Z, 0, 1)
    pvalue = min(pvalue, 1 - pvalue)
    if side == 'equal':
        pvalue *= 2
    if side == 'smaller' and Z < 0 and pvalue <= alpha:
        return MoodTestResult(Z, pvalue, 'H1: var(X) < var(Y)')
    elif side == 'larger' and Z > 0 and pvalue <= alpha:
        return MoodTestResult(Z, pvalue, 'H1: var(X) > var(Y)')
    elif side == 'equal' and pvalue <= alpha:
        return MoodTestResult(Z, pvalue, 'H1: var(X) != var(Y)')
    else:
        return MoodTestResult(Z, pvalue, 'H0: var(X) == var(Y)')
Exemple #2
0
def WicoxonTest(series, center, side='both', alpha=0.05):
    assert is_seq(series), 'Sign test expects sequence object stored data'
    assert all(map(is_math, series)), 'Sign test expects numerical data'
    assert is_math(center) is True, 'the value to compare must be a number'
    assert side == 'both', "don't supprot single side test in thie version"
    symbol = [1 if _ > center else 0 for _ in series]
    data = SeriesSet({'X': series, 'SYMBOL': symbol})

    # distance to the compare center
    data['ABS'] = data.X.apply(lambda x: abs(x - center))

    # rank records by distance
    data['RANK'] = get_ranks(data.ABS)
    
    # calculate the sum of ranking
    W_pos = sum(data.select(lambda row: row['SYMBOL'] == 1).RANK)
    W_neg = sum(data.select(lambda row: row['SYMBOL'] == 0).RANK)
    W = min(W_pos, W_neg)

    # calculate the Statistic
    n, C = float(data.shape.Ln), 0.5
    if W < n * (n + 1) / 4:
        C = -0.5
    Z = (W - n * (n + 1) / 4 + C) / (sqrt(n * (n + 1) * (2 * n + 1) / 24))
    pvalue = Ncdf(Z, 0, 1)
    return WicoxonTestResult(round(Z, 4), n, center, round(pvalue, 4))
Exemple #3
0
def clf_multilabel(seq, groupby=None):
    if is_seq(groupby):
        groupby = dict(enumerate(map(str, groupby)))
    if not groupby:
        groupby = dict()
    assert isinstance(groupby,
                      dict), '`labels` must be a list of str or dict object.'
    max_ind = seq.argmax(axis=1).T.tolist()[0]
    return Series(groupby.get(int(_), _) for _ in max_ind)
Exemple #4
0
def CoxStautTest(series, H0='increase'):
    assert H0 in ('increase', 'decrease', 'no-trend')
    assert is_seq(series), 'Sign test expects sequence object stored data'
    assert all(map(is_math, series)), 'Sign test expects numerical data'
    
    series = [r - l for l, r in zip(series, series[len(series)//2:])]

    if H0 == 'increase':
        r = SignTest(series, 0, 'lower')
    elif H0 == 'decrease':
        r =  SignTest(series, 0, 'upper')
    else:
        r =  SignTest(series, 0)
    return CoxStautResult(H0, r.n, r.pvalue)
Exemple #5
0
def SignTest(series, center, side='both', alpha=0.05):
    '''Sign Test is one of the most oldest method for Non-parametric Statistics

    Parameters
    ----------
    series : array-like
        a series of data you expect to inferen

    compare : float or int
        a value you expect to compare with (always the mode of series)

    side : str (default='both')

    alpha : float (default=0.5)
        the level of significant

    Return
    ------
    SeriesSet : the result of test

    References
    ----------
    Xin Wang & T.J Chu, Non-parametric Statistics (Second Edition),
    Tsinghua Publish, 2014.
    '''
    assert side in ('both', 'upper', 'lower')
    assert is_seq(series), 'Sign test expects sequence object stored data'
    assert all(map(is_math, series)), 'Sign test expects numerical data'
    assert is_math(center) is True, 'the value to compare must be a number'
    
    greater = [_ for _ in series if _ > center]
    smaller = [_ for _ in series if _ < center]
    n = len(greater) + len(smaller)
    if side == 'both':
        k, side = min(len(greater), len(smaller)), 2
        pvalue = min(Bcdf(k=k, n=n, p=0.5) * 2, 1)
    else:
        if side == 'upper':
            k = len(smaller)
        else:
            k = len(greater)
        pvalue = 1 - Bcdf(k=k, n=n, p=0.5)
    return SignTestResult(k, n, center, round(pvalue, 4))
Exemple #6
0
def WilcoxonMannWhitneyTest(X, Y, side='equal', alpha=0.05):
    '''Wilcoxon-Mann-Whitney Test compares the ranks of two populations

    Parameters
    ----------
    X : array-like
        a series of data you expect to inferen

    Y : array-like
        a series of data you expect to inferen

    side : str (default='both')
        `both` -> H1: Xmiu != Ymiu
        `larger` -> H1: Xmiu > Ymiu
        `smaller` -> H1: Xmiu < Ymiu

    alpha : float (default=0.5)
        the level of significant

    Return
    ------
    TestResult : namedtuple(Statistic, p-value, Decision)

    Example
    -------
    >>> from DaPy.methods.stats import median_test
    >>> X = [10, 8, 12, 16, 5, 9, 7, 11, 6]
    >>> Y = [12, 15, 20, 18, 13, 14, 9, 16]
    >>> median_test.BrownMood(X, Y, side='lower')
    BrownMoodTestResult(Statistic=-2.0748, pvalue=0.0190, Decision='H1: Mx < My')

    References
    ----------
    Xin Wang & T.J Chu, Non-parametric Statistics (Second Edition),
    Tsinghua Publish, 2014.
    '''
    assert side in ('equal', 'larger', 'smaller')
    assert is_seq(X) and is_seq(Y), 'W-M-W test expects sequence object stored data'
    assert all(map(is_math, X)) and all(map(is_math, Y)), 'W-M-W  test expects numerical data'

    # clean data
    combine_col = list(X) + list(Y)
    node_col = [i for i in Counter(combine_col).values() if i != 1]
    rank_pair_data = dict(zip(combine_col, get_ranks(combine_col)))
    rank_Y = [rank_pair_data[y] for y in Y]
    rank_X = [rank_pair_data[x] for x in X]

    # choose which hypothesis
    n, m = len(Y), len(X)
    Wx, Wy = sum(rank_X), sum(rank_Y)
    if side == 'equal':
        Wy = min(Wx, Wy)

    # do some statistic
    Wxy = Wy - n * (n + 1) / 2.0
    mn, m_n_1 = float(m * n), m + n + 1.0
    upper = Wxy - mn / 2.0
    down_left = m_n_1 / 12.0
    down_right = (sum([i ** 3 for i in node_col]) - sum(node_col)) / (12.0 * (m + n) * m_n_1)
    Z = upper / sqrt(mn * (down_left - down_right))
    pvalue = Ncdf(Z, 0, 1)
    pvalue = min(pvalue, 1 - pvalue)
    if side == 'equal':
        pvalue *= 2
    if side == 'smaller' and Z > 0 and pvalue <= alpha:
        return WilcoxonMannWhitneyResult(Z, pvalue, 'H1: Mx < My')
    elif side == 'larger' and Z < 0 and pvalue <= alpha:
        return WilcoxonMannWhitneyResult(Z, pvalue, 'H1: Mx > My')
    elif side == 'equal' and pvalue <= alpha:
        return WilcoxonMannWhitneyResult(Z, pvalue, 'H1: Mx != My')
    else:
        return WilcoxonMannWhitneyResult(Z, pvalue, 'H0: Mx == My')
Exemple #7
0
def BrownMoodTest(X, Y, side='equal', alpha=0.05):
    '''Brown-Mood Test compares the medians of two populations

    Parameters
    ----------
    X : array-like
        a series of data you expect to inferen

    Y : array-like
        a series of data you expect to inferen

    side : str (default='both')
        `both` -> H1: Xmed != Ymed
        `upper` -> H1: Xm > Ym
        `lower` -> H1: Xm < Ym

    alpha : float (default=0.5)
        the level of significant

    Return
    ------
    TestResult : namedtuple(Statistic, p-value, Decision)

    Example
    -------
    >>> from DaPy.methods.stats import median_test
    >>> X = [10, 8, 12, 16, 5, 9, 7, 11, 6]
    >>> Y = [12, 15, 20, 18, 13, 14, 9, 16]
    >>> median_test.BrownMood(X, Y, side='lower')
    BrownMoodTestResult(Statistic=-2.0748, pvalue=0.0190, Decision='H1: Mx < My')

    References
    ----------
    Xin Wang & T.J Chu, Non-parametric Statistics (Second Edition),
    Tsinghua Publish, 2014.
    '''
    assert side in ('equal', 'upper', 'lower')
    assert is_seq(X) and is_seq(Y), 'Brown-Mood test expects sequence object stored data'
    assert all(map(is_math, X)) and all(map(is_math, Y)), 'Brown-Mood test expects numerical data'

    Mxy = median(list(X) + list(Y))
    large_X = len([i for i in X if i > Mxy])
    large_Y = len([i for i in Y if i > Mxy])
    less_X = len([i for i in X if i < Mxy])
    less_Y = len([i for i in Y if i < Mxy])
    m, n = large_X + less_X, large_Y + less_Y
    t = large_X + large_Y
    k = min(m, t)

    upper = large_X - m * t / (m + n)
    lower = sqrt(float(m * n * t * (m + n - t)) / (m + n) ** 3)
    Z = upper / lower
    pvalue = Ncdf(Z, 0, 1)
    pvalue = min(pvalue, 1 - pvalue)
    if side == 'equal':
        pvalue *= 2
    if side == 'lower' and Z < 0 and pvalue <= alpha:
        return BrownMoodResult(Z, pvalue, 'H1: Mx < My')
    elif side == 'upper' and Z > 0 and pvalue <= alpha:
        return BrownMoodResult(Z, pvalue, 'H1: Mx > My')
    elif side == 'equal' and pvalue <= alpha:
        return BrownMoodResult(Z, pvalue, 'H1: Mx != My')
    else:
        return BrownMoodResult(Z, pvalue, 'H0: Mx == My')