def cochran(data=None, dv=None, within=None, subject=None): """Cochran Q test. Special case of the Friedman test when the dependant variable is binary. Parameters ---------- data : pandas DataFrame DataFrame dv : string Name of column containing the binary dependant variable. within : string Name of column containing the within-subject factor. subject : string Name of column containing the subject identifier. Returns ------- stats : DataFrame Test summary :: 'Q' : The Cochran Q statistic 'p-unc' : Uncorrected p-value 'dof' : degrees of freedom Notes ----- The Cochran Q Test is a non-parametric test for ANOVA with repeated measures where the dependent variable is binary. Data are expected to be in long-format. NaN are automatically removed from the data. The Q statistics is defined as: .. math:: Q = \\frac{(r-1)(r\\sum_j^rx_j^2-N^2)}{rN-\\sum_i^nx_i^2} where :math:`N` is the total sum of all observations, :math:`j=1,...,r` where :math:`r` is the number of repeated measures, :math:`i=1,...,n` where :math:`n` is the number of observations per condition. The p-value is then approximated using a chi-square distribution with :math:`r-1` degrees of freedom: .. math:: Q \\sim \\chi^2(r-1) References ---------- .. [1] Cochran, W.G., 1950. The comparison of percentages in matched samples. Biometrika 37, 256–266. https://doi.org/10.1093/biomet/37.3-4.256 Examples -------- Compute the Cochran Q test for repeated measurements. >>> from pingouin import cochran, read_dataset >>> df = read_dataset('cochran') >>> cochran(data=df, dv='Energetic', within='Time', subject='Subject') Source dof Q p-unc cochran Time 2 6.706 0.034981 """ # Check data _check_dataframe(dv=dv, within=within, data=data, subject=subject, effects='within') # Remove NaN if data[dv].isnull().any(): data = remove_rm_na(dv=dv, within=within, subject=subject, data=data[[subject, within, dv]]) # Groupby and extract size grp = data.groupby(within)[dv] grp_s = data.groupby(subject)[dv] k = data[within].nunique() dof = k - 1 # n = grp.count().unique()[0] # Q statistic and p-value q = (dof * (k * np.sum(grp.sum()**2) - grp.sum().sum()**2)) / \ (k * grp.sum().sum() - np.sum(grp_s.sum()**2)) p_unc = scipy.stats.chi2.sf(q, dof) # Create output dataframe stats = pd.DataFrame( { 'Source': within, 'dof': dof, 'Q': np.round(q, 3), 'p-unc': p_unc, }, index=['cochran']) return stats
def friedman(data=None, dv=None, within=None, subject=None): """Friedman test for repeated measurements. Parameters ---------- data : pandas DataFrame DataFrame dv : string Name of column containing the dependant variable. within : string Name of column containing the within-subject factor. subject : string Name of column containing the subject identifier. Returns ------- stats : DataFrame Test summary :: 'Q' : The Friedman Q statistic, corrected for ties 'p-unc' : Uncorrected p-value 'dof' : degrees of freedom Notes ----- The Friedman test is used for one-way repeated measures ANOVA by ranks. Data are expected to be in long-format. Note that if the dataset contains one or more other within subject factors, an automatic collapsing to the mean is applied on the dependant variable (same behavior as the ezANOVA R package). As such, results can differ from those of JASP. If you can, always double-check the results. Due to the assumption that the test statistic has a chi squared distribution, the p-value is only reliable for n > 10 and more than 6 repeated measurements. NaN values are automatically removed. Examples -------- Compute the Friedman test for repeated measurements. >>> from pingouin import friedman, read_dataset >>> df = read_dataset('rm_anova') >>> friedman(data=df, dv='DesireToKill', within='Disgustingness', ... subject='Subject') Source ddof1 Q p-unc Friedman Disgustingness 1 9.228 0.002384 """ # Check data _check_dataframe(dv=dv, within=within, data=data, subject=subject, effects='within') # Collapse to the mean data = data.groupby([subject, within]).mean().reset_index() # Remove NaN if data[dv].isnull().any(): data = remove_rm_na(dv=dv, within=within, subject=subject, data=data[[subject, within, dv]]) # Extract number of groups and total sample size grp = data.groupby(within)[dv] rm = list(data[within].unique()) k = len(rm) X = np.array([grp.get_group(r).values for r in rm]).T n = X.shape[0] # Rank per subject ranked = np.zeros(X.shape) for i in range(n): ranked[i] = scipy.stats.rankdata(X[i, :]) ssbn = (ranked.sum(axis=0)**2).sum() # Compute the test statistic Q = (12 / (n * k * (k + 1))) * ssbn - 3 * n * (k + 1) # Correct for ties ties = 0 for i in range(n): replist, repnum = scipy.stats.find_repeats(X[i]) for t in repnum: ties += t * (t * t - 1) c = 1 - ties / float(k * (k * k - 1) * n) Q /= c # Approximate the p-value ddof1 = k - 1 p_unc = scipy.stats.chi2.sf(Q, ddof1) # Create output dataframe stats = pd.DataFrame( { 'Source': within, 'ddof1': ddof1, 'Q': np.round(Q, 3), 'p-unc': p_unc, }, index=['Friedman']) col_order = ['Source', 'ddof1', 'Q', 'p-unc'] stats = stats.reindex(columns=col_order) stats.dropna(how='all', axis=1, inplace=True) return stats
def kruskal(data=None, dv=None, between=None, detailed=False): """Kruskal-Wallis H-test for independent samples. Parameters ---------- data : pandas DataFrame DataFrame dv : string Name of column containing the dependant variable. between : string Name of column containing the between factor. Returns ------- stats : DataFrame Test summary :: 'H' : The Kruskal-Wallis H statistic, corrected for ties 'p-unc' : Uncorrected p-value 'dof' : degrees of freedom Notes ----- The Kruskal-Wallis H-test tests the null hypothesis that the population median of all of the groups are equal. It is a non-parametric version of ANOVA. The test works on 2 or more independent samples, which may have different sizes. Due to the assumption that H has a chi square distribution, the number of samples in each group must not be too small. A typical rule is that each sample must have at least 5 measurements. NaN values are automatically removed. Examples -------- Compute the Kruskal-Wallis H-test for independent samples. >>> from pingouin import kruskal, read_dataset >>> df = read_dataset('anova') >>> kruskal(data=df, dv='Pain threshold', between='Hair color') Source ddof1 H p-unc Kruskal Hair color 3 10.589 0.014172 """ # Check data _check_dataframe(dv=dv, between=between, data=data, effects='between') # Remove NaN values data = data[[dv, between]].dropna() # Reset index (avoid duplicate axis error) data = data.reset_index(drop=True) # Extract number of groups and total sample size groups = list(data[between].unique()) n_groups = len(groups) n = data[dv].size # Rank data, dealing with ties appropriately data['rank'] = scipy.stats.rankdata(data[dv]) # Find the total of rank per groups grp = data.groupby(between)['rank'] sum_rk_grp = grp.sum().values n_per_grp = grp.count().values # Calculate chi-square statistic (H) H = (12 / (n * (n + 1)) * np.sum(sum_rk_grp**2 / n_per_grp)) - 3 * (n + 1) # Correct for ties H /= scipy.stats.tiecorrect(data['rank'].values) # Calculate DOF and p-value ddof1 = n_groups - 1 p_unc = scipy.stats.chi2.sf(H, ddof1) # Create output dataframe stats = pd.DataFrame( { 'Source': between, 'ddof1': ddof1, 'H': np.round(H, 3), 'p-unc': p_unc, }, index=['Kruskal']) col_order = ['Source', 'ddof1', 'H', 'p-unc'] stats = stats.reindex(columns=col_order) stats.dropna(how='all', axis=1, inplace=True) return stats
def friedman(data=None, dv=None, within=None, subject=None, method='chisq'): """Friedman test for repeated measurements. Parameters ---------- data : :py:class:`pandas.DataFrame` DataFrame dv : string Name of column containing the dependent variable. within : string Name of column containing the within-subject factor. subject : string Name of column containing the subject identifier. method : string Statistical test to perform. Must be ``'chisq'`` (chi-square test) or ``'f'`` (F test). See notes below for explanation. Returns ------- stats : :py:class:`pandas.DataFrame` * ``'W'``: Kendall's coefficient of concordance, corrected for ties If ``method='chisq'`` * ``'Q'``: The Friedman chi-square statistic, corrected for ties * ``'dof'``: degrees of freedom * ``'p-unc'``: Uncorrected p-value of the chi squared test If ``method='f'`` * ``'F'``: The Friedman F statistic, corrected for ties * ``'dof1'``: degrees of freedom of the numerator * ``'dof2'``: degrees of freedom of the denominator * ``'p-unc'``: Uncorrected p-value of the F test Notes ----- The Friedman test is used for one-way repeated measures ANOVA by ranks. Data are expected to be in long-format. Note that if the dataset contains one or more other within subject factors, an automatic collapsing to the mean is applied on the dependent variable (same behavior as the ezANOVA R package). As such, results can differ from those of JASP. If you can, always double-check the results. NaN values are automatically removed. The Friedman test is equivalent to the test of significance of Kendalls's coefficient of concordance (Kendall's W). Most commonly a Q statistic, which has asymptotical chi-squared distribution, is computed and used for testing. However, in [1]_ they showed the chi-squared test to be overly conservative for small numbers of samples and repeated measures. Instead they recommend the F test, which has the correct size and behaves like a permutation test, but is computationaly much easier. References ---------- .. [1] Marozzi, M. (2014). Testing for concordance between several criteria. Journal of Statistical Computation and Simulation, 84(9), 1843–1850. https://doi.org/10.1080/00949655.2013.766189 Examples -------- Compute the Friedman test for repeated measurements. >>> from pingouin import friedman, read_dataset >>> df = read_dataset('rm_anova') >>> friedman(data=df, dv='DesireToKill', within='Disgustingness', ... subject='Subject') Source W ddof1 Q p-unc Friedman Disgustingness 0.099224 1 9.227848 0.002384 This time we will use the F test method. >>> from pingouin import friedman, read_dataset >>> df = read_dataset('rm_anova') >>> friedman(data=df, dv='DesireToKill', within='Disgustingness', ... subject='Subject', method='f') Source W ddof1 ddof2 F p-unc Friedman Disgustingness 0.099224 0.978495 90.021505 10.13418 0.002138 We can see, compared to the previous example, that the p-value is slightly lower. This is expected, since the F test is more powerful (see Notes). """ # Check data _check_dataframe(dv=dv, within=within, data=data, subject=subject, effects='within') # Convert Categorical columns to string # This is important otherwise all the groupby will return different results # unless we specify .groupby(..., observed = True). for c in [subject, within]: if data[c].dtype.name == 'category': data[c] = data[c].astype(str) # Collapse to the mean data = data.groupby([subject, within]).mean().reset_index() # Remove NaN if data[dv].isnull().any(): data = remove_rm_na(dv=dv, within=within, subject=subject, data=data[[subject, within, dv]]) # Extract number of groups and total sample size grp = data.groupby(within)[dv] rm = list(data[within].unique()) k = len(rm) X = np.array([grp.get_group(r).to_numpy() for r in rm]).T n = X.shape[0] # Rank per subject ranked = np.zeros(X.shape) for i in range(n): ranked[i] = scipy.stats.rankdata(X[i, :]) ssbn = (ranked.sum(axis=0)**2).sum() # Correction for ties ties = 0 for i in range(n): replist, repnum = scipy.stats.find_repeats(X[i]) for t in repnum: ties += t * (t * t - 1) # Compute Kendall's W corrected for ties W = (12 * ssbn - 3 * n * n * k * (k + 1) * (k + 1)) / (n * n * k * (k - 1) * (k + 1) - n * ties) if method == 'chisq': # Compute the Q statistic Q = n * (k - 1) * W # Approximate the p-value ddof1 = k - 1 p_unc = scipy.stats.chi2.sf(Q, ddof1) # Create output dataframe stats = pd.DataFrame({'Source': within, 'W': W, 'ddof1': ddof1, 'Q': Q, 'p-unc': p_unc, }, index=['Friedman']) elif method == 'f': # Compute the F statistic F = W * (n - 1) / (1 - W) # Approximate the p-value ddof1 = k - 1 - 2 / n ddof2 = (n - 1) * ddof1 p_unc = scipy.stats.f.sf(F, ddof1, ddof2) # Create output dataframe stats = pd.DataFrame({'Source': within, 'W': W, 'ddof1': ddof1, 'ddof2': ddof2, 'F': F, 'p-unc': p_unc, }, index=['Friedman']) return _postprocess_dataframe(stats)
def cochran(data=None, dv=None, within=None, subject=None): """Cochran Q test. A special case of the Friedman test when the dependent variable is binary. Parameters ---------- data : :py:class:`pandas.DataFrame` DataFrame. Both wide and long-format dataframe are supported for this test. dv : string Name of column containing the dependent variable (only required if ``data`` is in long format). within : string Name of column containing the within-subject factor (only required if ``data`` is in long format). Two or more within-factor are not currently supported. subject : string Name of column containing the subject/rater identifier (only required if ``data`` is in long format). Returns ------- stats : :py:class:`pandas.DataFrame` * ``'Q'``: The Cochran Q statistic * ``'p-unc'``: Uncorrected p-value * ``'dof'``: degrees of freedom Notes ----- The Cochran Q test [1]_ is a non-parametric test for ANOVA with repeated measures where the dependent variable is binary. The Q statistics is defined as: .. math:: Q = \\frac{(r-1)(r\\sum_j^rx_j^2-N^2)}{rN-\\sum_i^nx_i^2} where :math:`N` is the total sum of all observations, :math:`j=1,...,r` where :math:`r` is the number of repeated measures, :math:`i=1,...,n` where :math:`n` is the number of observations per condition. The p-value is then approximated using a chi-square distribution with :math:`r-1` degrees of freedom: .. math:: Q \\sim \\chi^2(r-1) Data are expected to be in long-format. Missing values are automatically removed using a strict listwise approach (= complete-case analysis). In other words, any subject with one or more missing value(s) is completely removed from the dataframe prior to running the test. References ---------- .. [1] Cochran, W.G., 1950. The comparison of percentages in matched samples. Biometrika 37, 256–266. https://doi.org/10.1093/biomet/37.3-4.256 Examples -------- Compute the Cochran Q test for repeated measurements. >>> from pingouin import cochran, read_dataset >>> df = read_dataset('cochran') >>> cochran(data=df, dv='Energetic', within='Time', subject='Subject') Source dof Q p-unc cochran Time 2 6.705882 0.034981 Same but using a wide-format dataframe >>> df_wide = df.pivot_table(index="Subject", columns="Time", values="Energetic") >>> cochran(df_wide) Source dof Q p-unc cochran Within 2 6.705882 0.034981 """ # Convert from wide to long-format, if needed if all([v is None for v in [dv, within, subject]]): assert isinstance(data, pd.DataFrame) data = data._get_numeric_data().dropna() # Listwise deletion of missing values assert data.shape[0] > 2, "Data must have at least 3 non-missing rows." assert data.shape[1] > 1, "Data must contain at least two columns." data['Subj'] = np.arange(data.shape[0]) data = data.melt(id_vars='Subj', var_name='Within', value_name='DV') subject, within, dv = 'Subj', 'Within', 'DV' # Check data _check_dataframe(dv=dv, within=within, data=data, subject=subject, effects='within') assert not data[within].isnull().any(), "Cannot have missing values in `within`." assert not data[subject].isnull().any(), "Cannot have missing values in `subject`." # Pivot and melt the table. This has several effects: # 1) Force missing values to be explicit (a NaN cell is created) # 2) Automatic collapsing to the mean if multiple within factors are present # 3) If using dropna, remove rows with missing values (listwise deletion). # The latter is the same behavior as JASP (= strict complete-case analysis). data_piv = data.pivot_table(index=subject, columns=within, values=dv, observed=True) data_piv = data_piv.dropna() data = data_piv.melt(ignore_index=False, value_name=dv).reset_index() # Groupby and extract size grp = data.groupby(within, observed=True)[dv] grp_s = data.groupby(subject, observed=True)[dv] k = data[within].nunique() dof = k - 1 # n = grp.count().unique()[0] # Q statistic and p-value q = (dof * (k * np.sum(grp.sum()**2) - grp.sum().sum()**2)) / \ (k * grp.sum().sum() - np.sum(grp_s.sum()**2)) p_unc = scipy.stats.chi2.sf(q, dof) # Create output dataframe stats = pd.DataFrame({'Source': within, 'dof': dof, 'Q': q, 'p-unc': p_unc}, index=['cochran']) return _postprocess_dataframe(stats)
def friedman(data=None, dv=None, within=None, subject=None, method='chisq'): """Friedman test for repeated measurements. Parameters ---------- data : :py:class:`pandas.DataFrame` DataFrame. Both wide and long-format dataframe are supported for this test. dv : string Name of column containing the dependent variable (only required if ``data`` is in long format). within : string Name of column containing the within-subject factor (only required if ``data`` is in long format). Two or more within-factor are not currently supported. subject : string Name of column containing the subject/rater identifier (only required if ``data`` is in long format). method : string Statistical test to perform. Must be ``'chisq'`` (chi-square test) or ``'f'`` (F test). See notes below for explanation. Returns ------- stats : :py:class:`pandas.DataFrame` * ``'W'``: Kendall's coefficient of concordance, corrected for ties If ``method='chisq'`` * ``'Q'``: The Friedman chi-square statistic, corrected for ties * ``'dof'``: degrees of freedom * ``'p-unc'``: Uncorrected p-value of the chi squared test If ``method='f'`` * ``'F'``: The Friedman F statistic, corrected for ties * ``'dof1'``: degrees of freedom of the numerator * ``'dof2'``: degrees of freedom of the denominator * ``'p-unc'``: Uncorrected p-value of the F test Notes ----- The Friedman test is used for non-parametric (rank-based) one-way repeated measures ANOVA. It is equivalent to the test of significance of Kendalls's coefficient of concordance (Kendall's W). Most commonly a Q statistic, which has asymptotical chi-squared distribution, is computed and used for testing. However, the chi-squared test tend to be overly conservative for small numbers of samples and/or repeated measures, in which case a F-test is more adequate [1]_. Data can be in wide or long format. Missing values are automatically removed using a strict listwise approach (= complete-case analysis). In other words, any subject with one or more missing value(s) is completely removed from the dataframe prior to running the test. References ---------- .. [1] Marozzi, M. (2014). Testing for concordance between several criteria. Journal of Statistical Computation and Simulation, 84(9), 1843–1850. https://doi.org/10.1080/00949655.2013.766189 .. [2] https://www.real-statistics.com/anova-repeated-measures/friedman-test/ Examples -------- Compute the Friedman test for repeated measurements, using a wide-format dataframe >>> import pandas as pd >>> import pingouin as pg >>> df = pd.DataFrame({ ... 'white': {0: 10, 1: 8, 2: 7, 3: 9, 4: 7, 5: 4, 6: 5, 7: 6, 8: 5, 9: 10, 10: 4, 11: 7}, ... 'red': {0: 7, 1: 5, 2: 8, 3: 6, 4: 5, 5: 7, 6: 9, 7: 6, 8: 4, 9: 6, 10: 7, 11: 3}, ... 'rose': {0: 8, 1: 5, 2: 6, 3: 4, 4: 7, 5: 5, 6: 3, 7: 7, 8: 6, 9: 4, 10: 4, 11: 3}}) >>> pg.friedman(df) Source W ddof1 Q p-unc Friedman Within 0.083333 2 2.0 0.367879 Compare with SciPy >>> from scipy.stats import friedmanchisquare >>> friedmanchisquare(*df.to_numpy().T) FriedmanchisquareResult(statistic=1.9999999999999893, pvalue=0.3678794411714444) Using a long-format dataframe >>> df_long = df.melt(ignore_index=False).reset_index() >>> pg.friedman(data=df_long, dv="value", within="variable", subject="index") Source W ddof1 Q p-unc Friedman variable 0.083333 2 2.0 0.367879 Using the F-test method >>> pg.friedman(df, method="f") Source W ddof1 ddof2 F p-unc Friedman Within 0.083333 1.833333 20.166667 1.0 0.378959 """ # Convert from wide to long-format, if needed if all([v is None for v in [dv, within, subject]]): assert isinstance(data, pd.DataFrame) data = data._get_numeric_data().dropna() # Listwise deletion of missing values assert data.shape[0] > 2, "Data must have at least 3 non-missing rows." assert data.shape[1] > 1, "Data must contain at least two columns." data['Subj'] = np.arange(data.shape[0]) data = data.melt(id_vars='Subj', var_name='Within', value_name='DV') subject, within, dv = 'Subj', 'Within', 'DV' # Check dataframe _check_dataframe(dv=dv, within=within, data=data, subject=subject, effects='within') assert not data[within].isnull().any(), "Cannot have missing values in `within`." assert not data[subject].isnull().any(), "Cannot have missing values in `subject`." # Pivot the table to a wide-format dataframe. This has several effects: # 1) Force missing values to be explicit (a NaN cell is created) # 2) Automatic collapsing to the mean if multiple within factors are present # 3) If using dropna, remove rows with missing values (listwise deletion). # The latter is the same behavior as JASP (= strict complete-case analysis). data_piv = data.pivot_table(index=subject, columns=within, values=dv, observed=True) data_piv = data_piv.dropna() # Extract data in numpy array and calculate ranks X = data_piv.to_numpy() n, k = X.shape ranked = scipy.stats.rankdata(X, axis=1) ssbn = (ranked.sum(axis=0)**2).sum() # Correction for ties ties = 0 for i in range(n): replist, repnum = scipy.stats.find_repeats(X[i]) for t in repnum: ties += t * (t * t - 1) # Compute Kendall's W corrected for ties W = (12 * ssbn - 3 * n**2 * k * (k + 1)**2) / (n**2 * k * (k - 1) * (k + 1) - n * ties) if method == 'chisq': # Compute the Q statistic Q = n * (k - 1) * W # Approximate the p-value ddof1 = k - 1 p_unc = scipy.stats.chi2.sf(Q, ddof1) # Create output dataframe stats = pd.DataFrame({ 'Source': within, 'W': W, 'ddof1': ddof1, 'Q': Q, 'p-unc': p_unc}, index=['Friedman']) elif method == 'f': # Compute the F statistic F = W * (n - 1) / (1 - W) # Approximate the p-value ddof1 = k - 1 - 2 / n ddof2 = (n - 1) * ddof1 p_unc = scipy.stats.f.sf(F, ddof1, ddof2) # Create output dataframe stats = pd.DataFrame({ 'Source': within, 'W': W, 'ddof1': ddof1, 'ddof2': ddof2, 'F': F, 'p-unc': p_unc}, index=['Friedman']) return _postprocess_dataframe(stats)