def chance_agreement_different_frequency(annotations1, annotations2, nclasses): """Expected frequency of agreement by random annotations. Assumes that the annotators draw annotations at random with different but constant frequencies. Arguments --------- annotations1 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` annotations2 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` weights_func : function(m_i, m_j) Weights function that receives two matrices of indices i, j and returns the matrix of weights between them. Default is :func:`~pyanno.measures.distances.diagonal_distance` Return ------ result : float Chance agreement value """ freq1 = labels_frequency(annotations1, nclasses) freq2 = labels_frequency(annotations2, nclasses) chance_agreement = freq1 * freq2 return chance_agreement
def test_labels_frequency(self): annotations = np.array([[1, 2, -2, -2], [-2, -2, 3, 3], [-2, 1, 3, 1], [-2, -2, -2, -2]]) nclasses = 5 expected = np.array([0., 3., 1., 3., 0.]) / 7. result = pu.labels_frequency(annotations, nclasses, missing_val=-2) np.testing.assert_equal(result, expected)
def _random_initial_parameters(self, annotations, estimate_gamma): if estimate_gamma: # estimate gamma from observed annotations gamma = labels_frequency(annotations, self.nclasses) else: gamma = ModelBtLoopDesign._random_gamma(self.nclasses) theta = ModelBtLoopDesign._random_theta(self.nannotators) return self._params_to_vector(gamma, theta)
def _random_initial_parameters(self, annotations, estimate_gamma): if estimate_gamma: # estimate gamma from observed annotations gamma = labels_frequency(annotations, self.nclasses) else: gamma = ModelBt._random_gamma(self.nclasses) theta = ModelBt._random_theta(self.nannotators) return self._params_to_vector(gamma, theta)
def _random_initial_parameters(self, annotations, estimate_omega): # TODO duplication w/ ModelBtLoopDesign if estimate_omega: # estimate omega from observed annotations omega = labels_frequency(annotations, self.nclasses) else: omega = self.omega theta = ModelA._random_theta(self.nannotators) return theta, omega
def _update_frequency(self): nclasses = max(self.nclasses, self.annotations_container.nclasses) try: frequency = labels_frequency( self.annotations_container.annotations, nclasses).tolist() except PyannoValueError as e: logger.debug(e) frequency = np.zeros((nclasses, )).tolist() self.frequency = frequency self.frequency_plot = HintonDiagramPlot( data=self.frequency, title='Observed label frequencies')
def test_labels_frequency(self): annotations = np.array( [ [ 1, 2, -2, -2], [-2, -2, 3, 3], [-2, 1, 3, 1], [-2, -2, -2, -2] ] ) nclasses = 5 expected = np.array([0., 3., 1., 3., 0.]) / 7. result = pu.labels_frequency(annotations, nclasses, missing_val=-2) np.testing.assert_equal(result, expected)
def _update_frequency(self): nclasses = max(self.nclasses, self.annotations_container.nclasses) try: frequency = labels_frequency( self.annotations_container.annotations, nclasses).tolist() except PyannoValueError as e: logger.debug(e) frequency = np.zeros((nclasses,)).tolist() self.frequency = frequency self.frequency_plot = HintonDiagramPlot( data=self.frequency, title='Observed label frequencies')
def test_generate_annotations(self): # test to check that annotations are masked correctly when the number # of items is not divisible by the number of annotators nclasses, nannotators, nitems = 5, 7, 201 model = ModelBt.create_initial_state(nclasses, nannotators) annotations = model.generate_annotations(nitems) valid = is_valid(annotations) self.assertEqual(annotations.shape, (nitems, nannotators)) model.are_annotations_compatible(annotations) # perfect annotators, annotations correspond to prior nitems = 20000 model.theta[:] = 1. annotations = model.generate_annotations(nitems) freq = labels_frequency(annotations, nclasses) np.testing.assert_almost_equal(freq, model.gamma, 2)
def cohens_weighted_kappa(annotations1, annotations2, weights_func=diagonal_distance, nclasses=None): """Compute Cohen's weighted kappa for two annotators. Assumes that the annotators draw annotations at random with different but constant frequencies. Disagreements are weighted by a weights w_ij representing the "seriousness" of disagreement. For ordered codes, it is often set to the distance from the diagonal, i.e. `w_ij = |i-j|`. When w_ij is 0.0 on the diagonal and 1.0 elsewhere, Cohen's weighted kappa is equivalent to Cohen's kappa. See also: :func:`~pyanno.measures.distances.diagonal_distance`, :func:`~pyanno.measures.distances.binary_distance`, :func:`~pyanno.measures.agreement.cohens_kappa`, :func:`~pyanno.measures.helpers.pairwise_matrix` **References:** * Cohen, J. (1968). "Weighed kappa: Nominal scale agreement with provision for scaled disagreement or partial credit". Psychological Bulletin 70 (4): 213-220. * `Wikipedia entry <http://en.wikipedia.org/wiki/Cohen%27s_kappa>`_ Arguments --------- annotations1 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` annotations2 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` weights_func : function(m_i, m_j) Weights function that receives two matrices of indices i, j and returns the matrix of weights between them. Default is :func:`~pyanno.measures.distances.diagonal_distance` nclasses : int Number of annotation classes. If None, `nclasses` is inferred from the values in the annotations Returns ------- stat : float The value of the statistics """ if all_invalid(annotations1, annotations2): logger.debug("No valid annotations") return np.nan if nclasses is None: nclasses = compute_nclasses(annotations1, annotations2) # observed probability of each combination of annotations observed_freq = confusion_matrix(annotations1, annotations2, nclasses) observed_freq_sum = observed_freq.sum() if observed_freq_sum == 0: return np.nan observed_freq /= observed_freq_sum # expected probability of each combination of annotations if annotators # draw annotations at random with different but constant frequencies freq1 = labels_frequency(annotations1, nclasses) freq2 = labels_frequency(annotations2, nclasses) chance_freq = np.outer(freq1, freq2) # build weights matrix from weights function weights = np.fromfunction(weights_func, shape=(nclasses, nclasses), dtype=float) kappa = 1.0 - (weights * observed_freq).sum() / (weights * chance_freq).sum() return kappa
def cohens_weighted_kappa(annotations1, annotations2, weights_func = diagonal_distance, nclasses=None): """Compute Cohen's weighted kappa for two annotators. Assumes that the annotators draw annotations at random with different but constant frequencies. Disagreements are weighted by a weights w_ij representing the "seriousness" of disagreement. For ordered codes, it is often set to the distance from the diagonal, i.e. `w_ij = |i-j|`. When w_ij is 0.0 on the diagonal and 1.0 elsewhere, Cohen's weighted kappa is equivalent to Cohen's kappa. See also: :func:`~pyanno.measures.distances.diagonal_distance`, :func:`~pyanno.measures.distances.binary_distance`, :func:`~pyanno.measures.agreement.cohens_kappa`, :func:`~pyanno.measures.helpers.pairwise_matrix` **References:** * Cohen, J. (1968). "Weighed kappa: Nominal scale agreement with provision for scaled disagreement or partial credit". Psychological Bulletin 70 (4): 213-220. * `Wikipedia entry <http://en.wikipedia.org/wiki/Cohen%27s_kappa>`_ Arguments --------- annotations1 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` annotations2 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` weights_func : function(m_i, m_j) Weights function that receives two matrices of indices i, j and returns the matrix of weights between them. Default is :func:`~pyanno.measures.distances.diagonal_distance` nclasses : int Number of annotation classes. If None, `nclasses` is inferred from the values in the annotations Returns ------- stat : float The value of the statistics """ if all_invalid(annotations1, annotations2): logger.debug('No valid annotations') return np.nan if nclasses is None: nclasses = compute_nclasses(annotations1, annotations2) # observed probability of each combination of annotations observed_freq = confusion_matrix(annotations1, annotations2, nclasses) observed_freq_sum = observed_freq.sum() if observed_freq_sum == 0: return np.nan observed_freq /= observed_freq_sum # expected probability of each combination of annotations if annotators # draw annotations at random with different but constant frequencies freq1 = labels_frequency(annotations1, nclasses) freq2 = labels_frequency(annotations2, nclasses) chance_freq = np.outer(freq1, freq2) # build weights matrix from weights function weights = np.fromfunction(weights_func, shape=(nclasses, nclasses), dtype=float) kappa = 1. - (weights*observed_freq).sum() / (weights*chance_freq).sum() return kappa