Exemple #1
0
def cohens_kappa(annotations1, annotations2, nclasses=None):
    """Compute Cohen's kappa for two annotators.

    Assumes that the annotators draw annotations at random with different but
    constant frequencies.

    See also :func:`~pyanno.measures.helpers.pairwise_matrix`.

    **References:**

    * Cohen, Jacob (1960). A coefficient of agreement for nominal scales.
      Educational and Psychological Measurement, 20, 37--46.

    * `Wikipedia entry <http://en.wikipedia.org/wiki/Cohen%27s_kappa>`_

    Arguments
    ---------
    annotations1 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    annotations2 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
     """

    if all_invalid(annotations1, annotations2):
        logger.debug('No valid annotations')
        return np.nan

    if nclasses is None:
        nclasses = compute_nclasses(annotations1, annotations2)

    chance_agreement = chance_agreement_different_frequency(annotations1,
                                                            annotations2,
                                                            nclasses)

    observed_agreement = observed_agreement_frequency(annotations1,
                                                      annotations2,
                                                      nclasses)

    return chance_adjusted_agreement(observed_agreement.sum(),
                                     chance_agreement.sum())
Exemple #2
0
def scotts_pi(annotations1, annotations2, nclasses=None):
    """Return Scott's pi statistic for two annotators.

    Assumes that the annotators draw random annotations with the same
    frequency as the combined observed annotations.

    See also :func:`~pyanno.measures.helpers.pairwise_matrix`.

    **References:**

    * Scott, W. (1955). "Reliability of content analysis: The case of nominal
      scale coding." Public Opinion Quarterly, 19(3), 321-325.

    * `Wikipedia entry <http://en.wikipedia.org/wiki/Scott%27s_Pi>`_

    Arguments
    ---------
    annotations1 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    annotations2 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
    """

    if all_invalid(annotations1, annotations2):
        logger.debug('No valid annotations')
        return np.nan

    if nclasses is None:
        nclasses = compute_nclasses(annotations1, annotations2)

    chance_agreement = chance_agreement_same_frequency(annotations1,
                                                       annotations2,
                                                       nclasses)

    observed_agreement = observed_agreement_frequency(annotations1,
                                                      annotations2,
                                                      nclasses)

    return chance_adjusted_agreement(observed_agreement.sum(),
                                      chance_agreement.sum())
Exemple #3
0
def cohens_kappa(annotations1, annotations2, nclasses=None):
    """Compute Cohen's kappa for two annotators.

    Assumes that the annotators draw annotations at random with different but
    constant frequencies.

    See also :func:`~pyanno.measures.helpers.pairwise_matrix`.

    **References:**

    * Cohen, Jacob (1960). A coefficient of agreement for nominal scales.
      Educational and Psychological Measurement, 20, 37--46.

    * `Wikipedia entry <http://en.wikipedia.org/wiki/Cohen%27s_kappa>`_

    Arguments
    ---------
    annotations1 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    annotations2 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
     """

    if all_invalid(annotations1, annotations2):
        logger.debug("No valid annotations")
        return np.nan

    if nclasses is None:
        nclasses = compute_nclasses(annotations1, annotations2)

    chance_agreement = chance_agreement_different_frequency(annotations1, annotations2, nclasses)

    observed_agreement = observed_agreement_frequency(annotations1, annotations2, nclasses)

    return chance_adjusted_agreement(observed_agreement.sum(), chance_agreement.sum())
Exemple #4
0
def scotts_pi(annotations1, annotations2, nclasses=None):
    """Return Scott's pi statistic for two annotators.

    Assumes that the annotators draw random annotations with the same
    frequency as the combined observed annotations.

    See also :func:`~pyanno.measures.helpers.pairwise_matrix`.

    **References:**

    * Scott, W. (1955). "Reliability of content analysis: The case of nominal
      scale coding." Public Opinion Quarterly, 19(3), 321-325.

    * `Wikipedia entry <http://en.wikipedia.org/wiki/Scott%27s_Pi>`_

    Arguments
    ---------
    annotations1 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    annotations2 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
    """

    if all_invalid(annotations1, annotations2):
        logger.debug("No valid annotations")
        return np.nan

    if nclasses is None:
        nclasses = compute_nclasses(annotations1, annotations2)

    chance_agreement = chance_agreement_same_frequency(annotations1, annotations2, nclasses)

    observed_agreement = observed_agreement_frequency(annotations1, annotations2, nclasses)

    return chance_adjusted_agreement(observed_agreement.sum(), chance_agreement.sum())
Exemple #5
0
def fleiss_kappa(annotations, nclasses=None):
    """Compute Fleiss' kappa for multiple annotators.

    **References:**

    * Fleiss, J. L. (1971). "Measuring nominal scale agreement among many
      raters.". Psychological Bulletin, Vol 76(5), 378-382

    * `Wikipedia entry <http://en.wikipedia.org/wiki/Fleiss%27_kappa>`_

    Arguments
    ---------
    annotations : ndarray, shape = (n_items, n_annotators)
        Array of annotations for multiple annotators. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
    """

    if all_invalid(annotations):
        logger.debug("No valid annotations")
        return np.nan

    if nclasses is None:
        nclasses = compute_nclasses(annotations)

    # transform raw annotations into the number of annotations per class
    # for each item
    nitems = annotations.shape[0]
    nannotations = np.zeros((nitems, nclasses))
    for k in range(nclasses):
        nannotations[:, k] = (annotations == k).sum(1)

    return _fleiss_kappa_nannotations(nannotations)
Exemple #6
0
def fleiss_kappa(annotations, nclasses=None):
    """Compute Fleiss' kappa for multiple annotators.

    **References:**

    * Fleiss, J. L. (1971). "Measuring nominal scale agreement among many
      raters.". Psychological Bulletin, Vol 76(5), 378-382

    * `Wikipedia entry <http://en.wikipedia.org/wiki/Fleiss%27_kappa>`_

    Arguments
    ---------
    annotations : ndarray, shape = (n_items, n_annotators)
        Array of annotations for multiple annotators. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
    """

    if all_invalid(annotations):
        logger.debug('No valid annotations')
        return np.nan

    if nclasses is None:
        nclasses = compute_nclasses(annotations)

    # transform raw annotations into the number of annotations per class
    # for each item
    nitems = annotations.shape[0]
    nannotations = np.zeros((nitems, nclasses))
    for k in range(nclasses):
        nannotations[:,k] = (annotations==k).sum(1)

    return _fleiss_kappa_nannotations(nannotations)
Exemple #7
0
def krippendorffs_alpha(annotations, metric_func=diagonal_distance, nclasses=None):
    """Compute Krippendorff's alpha for multiple annotators.

    **References:**

    * Klaus Krippendorff (2004). "Content Analysis, an Introduction to Its
      Methodology", 2nd Edition. Thousand Oaks, CA: Sage Publications.
      In particular, Chapter 11, pages 219--250.

    * `Wikipedia entry <http://en.wikipedia.org/wiki/Krippendorff%27s_Alpha>`_

    See also:
    :func:`~pyanno.measures.distances.diagonal_distance`,
    :func:`~pyanno.measures.distances.binary_distance`,

    Arguments
    ---------
    annotations : ndarray, shape = (n_items, n_annotators)
        Array of annotations for multiple annotators. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    weights_func : function(m_i, m_j)
        Weights function that receives two matrices of indices
        i, j and returns the matrix of weights between them.
        Default is :func:`~pyanno.measures.distances.diagonal_distance`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
    """

    if all_invalid(annotations):
        logger.debug("No valid annotations")
        return np.nan

    if nclasses is None:
        nclasses = compute_nclasses(annotations)

    coincidences = coincidence_matrix(annotations, nclasses)

    nc = coincidences.sum(1)
    n = coincidences.sum()

    # ---- coincidences expected by chance
    chance_coincidences = np.empty((nclasses, nclasses), dtype=float)
    for c in range(nclasses):
        for k in range(nclasses):
            if c == k:
                chance_coincidences[c, k] = nc[c] * (nc[k] - 1.0) / (n - 1.0)
            else:
                chance_coincidences[c, k] = nc[c] * nc[k] / (n - 1.0)

    # build weights matrix from weights function
    weights = np.fromfunction(metric_func, shape=(nclasses, nclasses), dtype=float) ** 2.0

    alpha = 1.0 - ((weights * coincidences).sum() / (weights * chance_coincidences).sum())

    return alpha
Exemple #8
0
def cohens_weighted_kappa(annotations1, annotations2, weights_func=diagonal_distance, nclasses=None):
    """Compute Cohen's weighted kappa for two annotators.

    Assumes that the annotators draw annotations at random with different but
    constant frequencies. Disagreements are weighted by a weights
    w_ij representing the "seriousness" of disagreement. For ordered codes,
    it is often set to the distance from the diagonal, i.e. `w_ij = |i-j|`.

    When w_ij is 0.0 on the diagonal and 1.0 elsewhere,
    Cohen's weighted kappa is equivalent to Cohen's kappa.

    See also:
    :func:`~pyanno.measures.distances.diagonal_distance`,
    :func:`~pyanno.measures.distances.binary_distance`,
    :func:`~pyanno.measures.agreement.cohens_kappa`,
    :func:`~pyanno.measures.helpers.pairwise_matrix`

    **References:**

    * Cohen, J. (1968). "Weighed kappa: Nominal scale agreement with provision
      for scaled disagreement or partial credit". Psychological Bulletin
      70 (4): 213-220.

    * `Wikipedia entry <http://en.wikipedia.org/wiki/Cohen%27s_kappa>`_

    Arguments
    ---------
    annotations1 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    annotations2 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    weights_func : function(m_i, m_j)
        Weights function that receives two matrices of indices
        i, j and returns the matrix of weights between them.
        Default is :func:`~pyanno.measures.distances.diagonal_distance`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
    """

    if all_invalid(annotations1, annotations2):
        logger.debug("No valid annotations")
        return np.nan

    if nclasses is None:
        nclasses = compute_nclasses(annotations1, annotations2)

    # observed probability of each combination of annotations
    observed_freq = confusion_matrix(annotations1, annotations2, nclasses)
    observed_freq_sum = observed_freq.sum()
    if observed_freq_sum == 0:
        return np.nan

    observed_freq /= observed_freq_sum

    # expected probability of each combination of annotations if annotators
    # draw annotations at random with different but constant frequencies
    freq1 = labels_frequency(annotations1, nclasses)
    freq2 = labels_frequency(annotations2, nclasses)
    chance_freq = np.outer(freq1, freq2)

    # build weights matrix from weights function
    weights = np.fromfunction(weights_func, shape=(nclasses, nclasses), dtype=float)

    kappa = 1.0 - (weights * observed_freq).sum() / (weights * chance_freq).sum()

    return kappa
Exemple #9
0
def krippendorffs_alpha(annotations, metric_func=diagonal_distance,
                        nclasses=None):
    """Compute Krippendorff's alpha for multiple annotators.

    **References:**

    * Klaus Krippendorff (2004). "Content Analysis, an Introduction to Its
      Methodology", 2nd Edition. Thousand Oaks, CA: Sage Publications.
      In particular, Chapter 11, pages 219--250.

    * `Wikipedia entry <http://en.wikipedia.org/wiki/Krippendorff%27s_Alpha>`_

    See also:
    :func:`~pyanno.measures.distances.diagonal_distance`,
    :func:`~pyanno.measures.distances.binary_distance`,

    Arguments
    ---------
    annotations : ndarray, shape = (n_items, n_annotators)
        Array of annotations for multiple annotators. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    weights_func : function(m_i, m_j)
        Weights function that receives two matrices of indices
        i, j and returns the matrix of weights between them.
        Default is :func:`~pyanno.measures.distances.diagonal_distance`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
    """

    if all_invalid(annotations):
        logger.debug('No valid annotations')
        return np.nan

    if nclasses is None:
        nclasses = compute_nclasses(annotations)

    coincidences = coincidence_matrix(annotations, nclasses)

    nc = coincidences.sum(1)
    n = coincidences.sum()

    # ---- coincidences expected by chance
    chance_coincidences = np.empty((nclasses, nclasses), dtype=float)
    for c in range(nclasses):
        for k in range(nclasses):
            if c == k:
                chance_coincidences[c,k] = nc[c]*(nc[k]-1.) / (n-1.)
            else:
                chance_coincidences[c,k] = nc[c]*nc[k] / (n-1.)

    # build weights matrix from weights function
    weights = np.fromfunction(metric_func, shape=(nclasses, nclasses),
                              dtype=float) ** 2.

    alpha = 1. - ((weights*coincidences).sum()
                  / (weights*chance_coincidences).sum())

    return alpha
Exemple #10
0
def cohens_weighted_kappa(annotations1, annotations2,
                          weights_func = diagonal_distance,
                          nclasses=None):
    """Compute Cohen's weighted kappa for two annotators.

    Assumes that the annotators draw annotations at random with different but
    constant frequencies. Disagreements are weighted by a weights
    w_ij representing the "seriousness" of disagreement. For ordered codes,
    it is often set to the distance from the diagonal, i.e. `w_ij = |i-j|`.

    When w_ij is 0.0 on the diagonal and 1.0 elsewhere,
    Cohen's weighted kappa is equivalent to Cohen's kappa.

    See also:
    :func:`~pyanno.measures.distances.diagonal_distance`,
    :func:`~pyanno.measures.distances.binary_distance`,
    :func:`~pyanno.measures.agreement.cohens_kappa`,
    :func:`~pyanno.measures.helpers.pairwise_matrix`

    **References:**

    * Cohen, J. (1968). "Weighed kappa: Nominal scale agreement with provision
      for scaled disagreement or partial credit". Psychological Bulletin
      70 (4): 213-220.

    * `Wikipedia entry <http://en.wikipedia.org/wiki/Cohen%27s_kappa>`_

    Arguments
    ---------
    annotations1 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    annotations2 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    weights_func : function(m_i, m_j)
        Weights function that receives two matrices of indices
        i, j and returns the matrix of weights between them.
        Default is :func:`~pyanno.measures.distances.diagonal_distance`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
    """

    if all_invalid(annotations1, annotations2):
        logger.debug('No valid annotations')
        return np.nan

    if nclasses is None:
        nclasses = compute_nclasses(annotations1, annotations2)

    # observed probability of each combination of annotations
    observed_freq = confusion_matrix(annotations1, annotations2, nclasses)
    observed_freq_sum = observed_freq.sum()
    if observed_freq_sum == 0:
        return np.nan

    observed_freq /= observed_freq_sum

    # expected probability of each combination of annotations if annotators
    # draw annotations at random with different but constant frequencies
    freq1 = labels_frequency(annotations1, nclasses)
    freq2 = labels_frequency(annotations2, nclasses)
    chance_freq = np.outer(freq1, freq2)

    # build weights matrix from weights function
    weights = np.fromfunction(weights_func, shape=(nclasses, nclasses),
                              dtype=float)

    kappa = 1. - (weights*observed_freq).sum() / (weights*chance_freq).sum()

    return kappa