Example #1
0
    def test_observed_agreement(self):
        anno1 = np.array([0, 0, 1, 1, MV, 3])
        anno2 = np.array([0, MV, 1, 1, MV, 2])
        nvalid = np.sum(is_valid(anno1) & is_valid(anno2))

        expected = np.array([1., 2., 0., 0.]) / nvalid
        freqs = pmh.observed_agreement_frequency(anno1, anno2, 4)

        np.testing.assert_array_equal(freqs, expected)
Example #2
0
    def add_markings(self,
                     mark_classes,
                     mark_name,
                     marker_shape,
                     delta_x,
                     delta_y,
                     marker_size=5,
                     line_width=1.,
                     marker_color='white'):
        plot = self.plot_posterior
        nannotations = plot.data.arrays['values'].shape[0]

        y_name = mark_name + '_y'
        x_name = mark_name + '_x'

        valid = is_valid(mark_classes)
        y_values = np.arange(nannotations)[valid] + delta_y + 0.5
        x_values = mark_classes[valid].astype(float) + delta_x + 0.5

        plot.data.set_data(y_name, y_values)
        plot.data.set_data(x_name, x_values)

        plot.plot((x_name, y_name),
                  type='scatter',
                  name=mark_name,
                  marker=marker_shape,
                  marker_size=marker_size,
                  color='transparent',
                  outline_color=marker_color,
                  line_width=line_width)
Example #3
0
    def _compute_accuracy(self, category, annotations, use_prior):
        """Return accuracy, P(annotation_j = k' | category=k)

        Helper function to compute an estimate of the accuracy parameters
        theta, given labels and annotations.

        Returns
        -------
        accuracy : ndarray, shape = (n_annotators, n_classes, n_classes)
            accuracy[j,k,k'] = P(annotation_j = k' | category=k).
        """
        nitems, nannotators = annotations.shape
        # alpha - 1 : the mode of a Dirichlet is  (alpha_i - 1) / (alpha_0 - K)
        alpha_prior_count = self.alpha - 1.
        valid_mask = is_valid(annotations)

        annotators = np.arange(nannotators)[None,:]
        if use_prior:
            accuracy = np.tile(alpha_prior_count, (nannotators, 1, 1))
        else:
            accuracy = np.zeros((nannotators, self.nclasses, self.nclasses))

        for i in xrange(nitems):
            valid = valid_mask[i,:]
            accuracy[annotators[:,valid],:,annotations[i,valid]] += category[i,:]
        accuracy /= accuracy.sum(2)[:, :, None]

        return accuracy
Example #4
0
    def _compute_accuracy(self, category, annotations, use_prior):
        """Return accuracy, P(annotation_j = k' | category=k)

        Helper function to compute an estimate of the accuracy parameters
        theta, given labels and annotations.

        Returns
        -------
        accuracy : ndarray, shape = (n_annotators, n_classes, n_classes)
            accuracy[j,k,k'] = P(annotation_j = k' | category=k).
        """
        nitems, nannotators = annotations.shape
        # alpha - 1 : the mode of a Dirichlet is  (alpha_i - 1) / (alpha_0 - K)
        alpha_prior_count = self.alpha - 1.
        valid_mask = is_valid(annotations)

        annotators = np.arange(nannotators)[None, :]
        if use_prior:
            accuracy = np.tile(alpha_prior_count, (nannotators, 1, 1))
        else:
            accuracy = np.zeros((nannotators, self.nclasses, self.nclasses))

        for i in range(nitems):
            valid = valid_mask[i, :]
            accuracy[annotators[:, valid], :,
                     annotations[i, valid]] += category[i, :]
        accuracy /= accuracy.sum(2)[:, :, None]

        return accuracy
Example #5
0
    def test_generate_annotations(self):
        # test to check that annotations are masked correctly when the number
        # of items is not divisible by the number of annotators
        nclasses, nitems = 5, 8*30+3

        model = ModelA.create_initial_state(nclasses)
        annotations = model.generate_annotations(nitems)

        valid = is_valid(annotations)
        # check that on every row there are exactly 3 annotations
        self.assertTrue(np.all(valid.sum(1) == 3))
Example #6
0
    def test_generate_annotations(self):
        # test to check that annotations are masked correctly when the number
        # of items is not divisible by the number of annotators
        nclasses, nitems = 5, 8*30+3

        model = ModelA.create_initial_state(nclasses)
        annotations = model.generate_annotations(nitems)

        valid = is_valid(annotations)
        # check that on every row there are exactly 3 annotations
        self.assertTrue(np.all(valid.sum(1) == 3))
Example #7
0
def spearmans_rho(annotations1, annotations2, nclasses=None):
    """Compute Spearman's rank correlation coefficient.

    See also :func:`~pyanno.measures.helpers.pairwise_matrix`.

    **References:**

    * `Wikipedia entry
      <http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_

    Arguments
    ---------
    annotations1 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    annotations2 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
    """

    valid = is_valid(annotations1) & is_valid(annotations2)
    if all(~valid):
        logger.debug('No valid annotations')
        return np.nan

    rho, pval = scipy.stats.spearmanr(annotations1[valid], annotations2[valid])
    return rho
Example #8
0
def spearmans_rho(annotations1, annotations2, nclasses=None):
    """Compute Spearman's rank correlation coefficient.

    See also :func:`~pyanno.measures.helpers.pairwise_matrix`.

    **References:**

    * `Wikipedia entry
      <http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_

    Arguments
    ---------
    annotations1 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    annotations2 : ndarray, shape = (n_items, )
        Array of annotations for a single annotator. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    stat : float
        The value of the statistics
    """

    valid = is_valid(annotations1) & is_valid(annotations2)
    if all(~valid):
        logger.debug('No valid annotations')
        return np.nan

    rho, pval = scipy.stats.spearmanr(annotations1[valid], annotations2[valid])
    return rho
Example #9
0
def coincidence_matrix(annotations, nclasses):
    """Build coincidence matrix.

    The element c,k of the coincidence matrix contains the number of c-k pairs
    in the data (across annotators), over the total number of observed pairs.

    **Reference**

    * `Wikipedia entry
      <http://en.wikipedia.org/wiki/Krippendorff%27s_Alpha#Coincidence_matrices>`_

    Arguments
    ---------
    annotations : ndarray, shape = (n_items, n_annotators)
        Array of annotations for multiple annotators. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    coinc_mat : ndarray, shape = (n_classes, n_classes)
        Coincidence matrix
    """

    # total number of annotations in row
    nannotations = is_valid(annotations).sum(1).astype(float)
    valid = nannotations > 1

    nannotations = nannotations[valid]
    annotations = annotations[valid, :]

    # number of annotations of class c in row
    nc_in_row = np.empty((nannotations.shape[0], nclasses), dtype=int)
    for c in range(nclasses):
        nc_in_row[:, c] = (annotations == c).sum(1)

    coincidences = np.empty((nclasses, nclasses), dtype=float)
    for c in range(nclasses):
        for k in range(nclasses):
            if c == k:
                nck_pairs = nc_in_row[:, c] * (nc_in_row[:, c] - 1)
            else:
                nck_pairs = nc_in_row[:, c] * nc_in_row[:, k]
            coincidences[c, k] = (nck_pairs / (nannotations - 1.0)).sum()

    return coincidences
Example #10
0
def coincidence_matrix(annotations, nclasses):
    """Build coincidence matrix.

    The element c,k of the coincidence matrix contains the number of c-k pairs
    in the data (across annotators), over the total number of observed pairs.

    **Reference**

    * `Wikipedia entry
      <http://en.wikipedia.org/wiki/Krippendorff%27s_Alpha#Coincidence_matrices>`_

    Arguments
    ---------
    annotations : ndarray, shape = (n_items, n_annotators)
        Array of annotations for multiple annotators. Missing values should be
        indicated by :attr:`pyanno.util.MISSING_VALUE`

    nclasses : int
        Number of annotation classes. If None, `nclasses` is inferred from the
        values in the annotations

    Returns
    -------
    coinc_mat : ndarray, shape = (n_classes, n_classes)
        Coincidence matrix
    """

    # total number of annotations in row
    nannotations = is_valid(annotations).sum(1).astype(float)
    valid = nannotations > 1

    nannotations = nannotations[valid]
    annotations = annotations[valid, :]

    # number of annotations of class c in row
    nc_in_row = np.empty((nannotations.shape[0], nclasses), dtype=int)
    for c in range(nclasses):
        nc_in_row[:, c] = (annotations == c).sum(1)

    coincidences = np.empty((nclasses, nclasses), dtype=float)
    for c in range(nclasses):
        for k in range(nclasses):
            if c == k:
                nck_pairs = nc_in_row[:, c] * (nc_in_row[:, c] - 1)
            else:
                nck_pairs = nc_in_row[:, c] * nc_in_row[:, k]
            coincidences[c, k] = (nck_pairs / (nannotations - 1.)).sum()

    return coincidences
Example #11
0
    def test_generate_annotations(self):
        nitems = 2000*8
        nclasses = 3

        # create random model
        model = ModelA.create_initial_state(nclasses)
        # create random data
        annotations = model.generate_annotations(nitems)

        self.assertEqual(annotations.shape, (nitems, model.nannotators))
        self.assertTrue(np.all(is_valid(annotations).sum(1) == 3))

        freqs = (np.array([(annotations==psi).sum() / float(nitems*3)
                           for psi in range(nclasses)]))
        testing.assert_allclose(model.omega, freqs, atol=1e-1, rtol=0.)
Example #12
0
    def test_generate_annotations(self):
        nitems = 2000*8
        nclasses = 3

        # create random model
        model = ModelA.create_initial_state(nclasses)
        # create random data
        annotations = model.generate_annotations(nitems)

        self.assertEqual(annotations.shape, (nitems, model.nannotators))
        self.assertTrue(np.all(is_valid(annotations).sum(1) == 3))

        freqs = (np.array([(annotations==psi).sum() / float(nitems*3)
                           for psi in range(nclasses)]))
        testing.assert_allclose(model.omega, freqs, atol=1e-1, rtol=0.)
    def infer_labels(self, annotations):
        """Infer posterior distribution over label classes.

         Compute the posterior distribution over label classes given observed
         annotations, :math:`P( \mathbf{y} | \mathbf{x}, \\theta, \omega)`.

         Arguments
         ----------
         annotations : ndarray, shape = (n_items, n_annotators)
             annotations[i,j] is the annotation of annotator j for item i

         Returns
         -------
         posterior : ndarray, shape = (n_items, n_classes)
             posterior[i,k] is the posterior probability of class k given the
             annotation observed in item i.
         """


        self._raise_if_incompatible(annotations)

        nitems = annotations.shape[0]
        gamma = self.gamma
        nclasses = self.nclasses

        # get indices of annotators active in each row
        valid_entries = is_valid(annotations).nonzero()
        annotator_indices = np.reshape(valid_entries[1],
            (nitems, self.nannotators_per_item))
        valid_annotations = annotations[valid_entries]
        valid_annotations = np.reshape(valid_annotations,
            (nitems, self.nannotators_per_item))

        # thetas of active annotators
        theta_equal = self.theta[annotator_indices]
        theta_not_equal = (1. - theta_equal) / (nclasses - 1.)

        # compute posterior over psi
        psi_distr = np.zeros((nitems, nclasses))
        for psi in xrange(nclasses):
            tmp = np.where(valid_annotations == psi,
                           theta_equal, theta_not_equal)
            psi_distr[:,psi] = gamma[psi] * tmp.prod(1)

        # normalize distribution
        psi_distr /= psi_distr.sum(1)[:,np.newaxis]
        return psi_distr
Example #14
0
    def infer_labels(self, annotations):
        """Infer posterior distribution over label classes.

         Compute the posterior distribution over label classes given observed
         annotations, :math:`P( \mathbf{y} | \mathbf{x}, \\theta, \omega)`.

         Arguments
         ----------
         annotations : ndarray, shape = (n_items, n_annotators)
             annotations[i,j] is the annotation of annotator j for item i

         Returns
         -------
         posterior : ndarray, shape = (n_items, n_classes)
             posterior[i,k] is the posterior probability of class k given the
             annotation observed in item i.
         """

        self._raise_if_incompatible(annotations)

        nitems = annotations.shape[0]
        gamma = self.gamma
        nclasses = self.nclasses

        # get indices of annotators active in each row
        valid_entries = is_valid(annotations).nonzero()
        annotator_indices = np.reshape(valid_entries[1],
                                       (nitems, self.nannotators_per_item))
        valid_annotations = annotations[valid_entries]
        valid_annotations = np.reshape(valid_annotations,
                                       (nitems, self.nannotators_per_item))

        # thetas of active annotators
        theta_equal = self.theta[annotator_indices]
        theta_not_equal = (1. - theta_equal) / (nclasses - 1.)

        # compute posterior over psi
        psi_distr = np.zeros((nitems, nclasses))
        for psi in xrange(nclasses):
            tmp = np.where(valid_annotations == psi, theta_equal,
                           theta_not_equal)
            psi_distr[:, psi] = gamma[psi] * tmp.prod(1)

        # normalize distribution
        psi_distr /= psi_distr.sum(1)[:, np.newaxis]
        return psi_distr
Example #15
0
    def test_generate_annotations(self):
        # test to check that annotations are masked correctly when the number
        # of items is not divisible by the number of annotators
        nclasses, nannotators, nitems = 5, 7, 201

        model = ModelBt.create_initial_state(nclasses, nannotators)
        annotations = model.generate_annotations(nitems)

        valid = is_valid(annotations)
        self.assertEqual(annotations.shape, (nitems, nannotators))
        model.are_annotations_compatible(annotations)

        # perfect annotators, annotations correspond to prior
        nitems = 20000
        model.theta[:] = 1.
        annotations = model.generate_annotations(nitems)
        freq = labels_frequency(annotations, nclasses)
        np.testing.assert_almost_equal(freq, model.gamma, 2)
Example #16
0
    def test_generate_annotations(self):
        # test to check that annotations are masked correctly when the number
        # of items is not divisible by the number of annotators
        nclasses, nannotators, nitems = 5, 7, 201

        model = ModelBt.create_initial_state(nclasses, nannotators)
        annotations = model.generate_annotations(nitems)

        valid = is_valid(annotations)
        self.assertEqual(annotations.shape, (nitems, nannotators))
        model.are_annotations_compatible(annotations)

        # perfect annotators, annotations correspond to prior
        nitems = 20000
        model.theta[:] = 1.
        annotations = model.generate_annotations(nitems)
        freq = labels_frequency(annotations, nclasses)
        np.testing.assert_almost_equal(freq, model.gamma, 2)
    def add_markings(self, mark_classes, mark_name, marker_shape,
                     delta_x, delta_y, marker_size=5, line_width=1.,
                     marker_color='white'):
        plot = self.plot_posterior
        nannotations = plot.data.arrays['values'].shape[0]

        y_name = mark_name + '_y'
        x_name = mark_name + '_x'

        valid = is_valid(mark_classes)
        y_values = np.arange(nannotations)[valid] + delta_y + 0.5
        x_values = mark_classes[valid].astype(float) + delta_x + 0.5

        plot.data.set_data(y_name, y_values)
        plot.data.set_data(x_name, x_values)

        plot.plot((x_name, y_name), type='scatter', name=mark_name,
                  marker=marker_shape, marker_size=marker_size,
                  color='transparent',
                  outline_color=marker_color, line_width=line_width)
Example #18
0
 def _missing_mask(self, annotations):
     missing_mask = ~ is_valid(annotations)
     missing_mask_nclasses = np.tile(missing_mask[:, :, None],
         (1, 1, self.nclasses))
     return missing_mask_nclasses
Example #19
0
def all_invalid(*annotations):
    """Return True if all annotations are invalid."""
    for anno in annotations:
        if np.any(is_valid(anno)):
            return False
    return True
Example #20
0
def all_invalid(*annotations):
    """Return True if all annotations are invalid."""
    for anno in annotations:
        if np.any(is_valid(anno)):
            return False
    return True
Example #21
0
 def _missing_mask(self, annotations):
     missing_mask = ~is_valid(annotations)
     missing_mask_nclasses = np.tile(missing_mask[:, :, None],
                                     (1, 1, self.nclasses))
     return missing_mask_nclasses