def _fleiss_kappa_nannotations(nannotations): """Compute Fleiss' kappa gien number of annotations per class format. This is a testable helper for fleiss_kappa. """ nitems = nannotations.shape[0] # check that all rows are annotated by the same number of annotators _nanno_sum = nannotations.sum(1) nannotations_per_item = _nanno_sum[0] if not np.all(_nanno_sum == nannotations_per_item): raise PyannoValueError("Number of annotations per item is not constant.") # empirical frequency of categories freqs = nannotations.sum(0) / (nitems * nannotations_per_item) chance_agreement = (freqs ** 2.0).sum() # annotator agreement for i-th item, relative to possible annotators pairs agreement_rate = ((nannotations ** 2.0).sum(1) - nannotations_per_item) / ( nannotations_per_item * (nannotations_per_item - 1.0) ) observed_agreement = agreement_rate.mean() return chance_adjusted_agreement(observed_agreement, chance_agreement)
def _fleiss_kappa_nannotations(nannotations): """Compute Fleiss' kappa gien number of annotations per class format. This is a testable helper for fleiss_kappa. """ nitems = nannotations.shape[0] # check that all rows are annotated by the same number of annotators _nanno_sum = nannotations.sum(1) nannotations_per_item = _nanno_sum[0] if not np.all(_nanno_sum == nannotations_per_item): raise PyannoValueError( 'Number of annotations per item is not constant.' ) # empirical frequency of categories freqs = nannotations.sum(0) / (nitems*nannotations_per_item) chance_agreement = (freqs**2.).sum() # annotator agreement for i-th item, relative to possible annotators pairs agreement_rate = (((nannotations**2.).sum(1) - nannotations_per_item) / (nannotations_per_item*(nannotations_per_item-1.))) observed_agreement = agreement_rate.mean() return chance_adjusted_agreement(observed_agreement, chance_agreement)
def cohens_kappa(annotations1, annotations2, nclasses=None): """Compute Cohen's kappa for two annotators. Assumes that the annotators draw annotations at random with different but constant frequencies. See also :func:`~pyanno.measures.helpers.pairwise_matrix`. **References:** * Cohen, Jacob (1960). A coefficient of agreement for nominal scales. Educational and Psychological Measurement, 20, 37--46. * `Wikipedia entry <http://en.wikipedia.org/wiki/Cohen%27s_kappa>`_ Arguments --------- annotations1 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` annotations2 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` nclasses : int Number of annotation classes. If None, `nclasses` is inferred from the values in the annotations Returns ------- stat : float The value of the statistics """ if all_invalid(annotations1, annotations2): logger.debug('No valid annotations') return np.nan if nclasses is None: nclasses = compute_nclasses(annotations1, annotations2) chance_agreement = chance_agreement_different_frequency(annotations1, annotations2, nclasses) observed_agreement = observed_agreement_frequency(annotations1, annotations2, nclasses) return chance_adjusted_agreement(observed_agreement.sum(), chance_agreement.sum())
def scotts_pi(annotations1, annotations2, nclasses=None): """Return Scott's pi statistic for two annotators. Assumes that the annotators draw random annotations with the same frequency as the combined observed annotations. See also :func:`~pyanno.measures.helpers.pairwise_matrix`. **References:** * Scott, W. (1955). "Reliability of content analysis: The case of nominal scale coding." Public Opinion Quarterly, 19(3), 321-325. * `Wikipedia entry <http://en.wikipedia.org/wiki/Scott%27s_Pi>`_ Arguments --------- annotations1 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` annotations2 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` nclasses : int Number of annotation classes. If None, `nclasses` is inferred from the values in the annotations Returns ------- stat : float The value of the statistics """ if all_invalid(annotations1, annotations2): logger.debug('No valid annotations') return np.nan if nclasses is None: nclasses = compute_nclasses(annotations1, annotations2) chance_agreement = chance_agreement_same_frequency(annotations1, annotations2, nclasses) observed_agreement = observed_agreement_frequency(annotations1, annotations2, nclasses) return chance_adjusted_agreement(observed_agreement.sum(), chance_agreement.sum())
def cohens_kappa(annotations1, annotations2, nclasses=None): """Compute Cohen's kappa for two annotators. Assumes that the annotators draw annotations at random with different but constant frequencies. See also :func:`~pyanno.measures.helpers.pairwise_matrix`. **References:** * Cohen, Jacob (1960). A coefficient of agreement for nominal scales. Educational and Psychological Measurement, 20, 37--46. * `Wikipedia entry <http://en.wikipedia.org/wiki/Cohen%27s_kappa>`_ Arguments --------- annotations1 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` annotations2 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` nclasses : int Number of annotation classes. If None, `nclasses` is inferred from the values in the annotations Returns ------- stat : float The value of the statistics """ if all_invalid(annotations1, annotations2): logger.debug("No valid annotations") return np.nan if nclasses is None: nclasses = compute_nclasses(annotations1, annotations2) chance_agreement = chance_agreement_different_frequency(annotations1, annotations2, nclasses) observed_agreement = observed_agreement_frequency(annotations1, annotations2, nclasses) return chance_adjusted_agreement(observed_agreement.sum(), chance_agreement.sum())
def scotts_pi(annotations1, annotations2, nclasses=None): """Return Scott's pi statistic for two annotators. Assumes that the annotators draw random annotations with the same frequency as the combined observed annotations. See also :func:`~pyanno.measures.helpers.pairwise_matrix`. **References:** * Scott, W. (1955). "Reliability of content analysis: The case of nominal scale coding." Public Opinion Quarterly, 19(3), 321-325. * `Wikipedia entry <http://en.wikipedia.org/wiki/Scott%27s_Pi>`_ Arguments --------- annotations1 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` annotations2 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` nclasses : int Number of annotation classes. If None, `nclasses` is inferred from the values in the annotations Returns ------- stat : float The value of the statistics """ if all_invalid(annotations1, annotations2): logger.debug("No valid annotations") return np.nan if nclasses is None: nclasses = compute_nclasses(annotations1, annotations2) chance_agreement = chance_agreement_same_frequency(annotations1, annotations2, nclasses) observed_agreement = observed_agreement_frequency(annotations1, annotations2, nclasses) return chance_adjusted_agreement(observed_agreement.sum(), chance_agreement.sum())