Example #1
0
    def generate_annotations_from_labels(self, labels):
        """Generate random annotations from the model, given labels

        The method samples random annotations from the conditional probability
        distribution of annotations, :math:`x_i^j`
        given labels, :math:`y_i`.

        Arguments
        ----------
        labels : ndarray, shape = (n_items,), dtype = int
            Set of "true" labels

        Returns
        -------
        annotations : ndarray, shape = (n_items, n_annotators)
            annotations[i,j] is the annotation of annotator j for item i
        """
        theta = self.theta
        nitems = labels.shape[0]

        annotations = np.empty((nitems, self.nannotators), dtype=int)
        for j in range(self.nannotators):
            for i in range(nitems):
                distr = self._theta_to_categorical(theta[j], labels[i])
                annotations[i, j] = random_categorical(distr, 1)

        return annotations
Example #2
0
    def generate_annotations_from_labels(self, labels):
        """Generate random annotations from the model, given labels

        The method samples random annotations from the conditional probability
        distribution of annotations, :math:`x_i^j`
        given labels, :math:`y_i`:

        :math:`x_i^j \sim \mathrm{Categorical}(\mathbf{\\theta_j^{y_i}})`

        Arguments
        ----------
        labels : ndarray, shape = (n_items,), dtype = int
            Set of "true" labels

        Returns
        -------
        annotations : ndarray, shape = (n_items, n_annotators)
            annotations[i,j] is the annotation of annotator j for item i
        """
        nitems = labels.shape[0]
        annotations = np.empty((nitems, self.nannotators), dtype=int)
        for j in range(self.nannotators):
            for i in range(nitems):
                annotations[i, j] = (random_categorical(
                    self.theta[j, labels[i], :], 1))
        return annotations
Example #3
0
    def _generate_agreement(self, incorrect):
        """Return indices of agreement pattern given correctness pattern.

        The indices returned correspond to agreement patterns
        as in Table 3: 0=aaa, 1=aaA, 2=aAa, 3=Aaa, 4=Aa@
        """

        # create tensor A_ijk
        # (cf. Table 3 in Rzhetsky et al., 2009, suppl. mat.)
        alpha = self._compute_alpha()
        agreement_tbl = np.array(
            [[1., 0., 0., 0., 0.], [0., 1., 0., 0., 0.], [0., 0., 1., 0., 0.],
             [0., 0., 0., 1., 0.], [0., 0., 0., alpha[0], 1. - alpha[0]],
             [0., 0., alpha[1], 0., 1. - alpha[1]],
             [0., alpha[2], 0., 0., 1. - alpha[2]],
             [alpha[3], alpha[4], alpha[5], alpha[6], 1. - alpha[3:].sum()]])

        # this array maps boolean correctness patterns (e.g., CCI) to
        # indices in the agreement tensor, `agreement_tbl`
        correctness_to_agreement_idx = np.array([0, 3, 2, 6, 1, 5, 4, 7])

        # convert correctness pattern to index in the A_ijk tensor
        correct_idx = correctness_to_agreement_idx[incorrect[:, 0] * 1 +
                                                   incorrect[:, 1] * 2 +
                                                   incorrect[:, 2] * 4]

        # the indices stored in `agreement` correspond to agreement patterns
        # as in Table 3: 0=aaa, 1=aaA, 2=aAa, 3=Aaa, 4=Aa@
        nitems_per_loop = incorrect.shape[0]
        agreement = np.empty((nitems_per_loop, ), dtype=int)
        for i in range(nitems_per_loop):
            # generate agreement pattern according to A_ijk
            agreement[i] = random_categorical(agreement_tbl[correct_idx[i]], 1)

        return agreement
Example #4
0
    def generate_annotations_from_labels(self, labels):
        """Generate random annotations from the model, given labels

        The method samples random annotations from the conditional probability
        distribution of annotations, :math:`x_i^j`
        given labels, :math:`y_i`:

        :math:`x_i^j \sim \mathrm{Categorical}(\mathbf{\\theta_j^{y_i}})`

        Arguments
        ----------
        labels : ndarray, shape = (n_items,), dtype = int
            Set of "true" labels

        Returns
        -------
        annotations : ndarray, shape = (n_items, n_annotators)
            annotations[i,j] is the annotation of annotator j for item i
        """
        nitems = labels.shape[0]
        annotations = np.empty((nitems, self.nannotators), dtype=int)
        for j in xrange(self.nannotators):
            for i in xrange(nitems):
                annotations[i,j]  = (
                    random_categorical(self.theta[j,labels[i],:], 1))
        return annotations
Example #5
0
    def generate_annotations_from_labels(self, labels):
        """Generate random annotations from the model, given labels

        The method samples random annotations from the conditional probability
        distribution of annotations, :math:`x_i^j`
        given labels, :math:`y_i`.

        Arguments
        ----------
        labels : ndarray, shape = (n_items,), dtype = int
            Set of "true" labels

        Returns
        -------
        annotations : ndarray, shape = (n_items, n_annotators)
            annotations[i,j] is the annotation of annotator j for item i
        """
        theta = self.theta
        nitems = labels.shape[0]

        annotations = np.empty((nitems, self.nannotators), dtype=int)
        for j in xrange(self.nannotators):
            for i in xrange(nitems):
                distr = self._theta_to_categorical(theta[j], labels[i])
                annotations[i,j]  = random_categorical(distr, 1)

        return annotations
    def generate_annotations_from_labels(self, labels):
        """Generate random annotations from the model, given labels

        The method samples random annotations from the conditional probability
        distribution of annotations, :math:`x_i^j`
        given labels, :math:`y_i`.

        Arguments
        ----------
        labels : ndarray, shape = (n_items,), dtype = int
            Set of "true" labels

        Returns
        -------
        annotations : ndarray, shape = (n_items, n_annotators)
            annotations[i,j] is the annotation of annotator j for item i
        """
        theta = self.theta
        nannotators = self.nannotators
        nitems = labels.shape[0]
        nitems_per_loop = np.ceil(float(nitems) / nannotators)

        annotations = np.empty((nitems, nannotators), dtype=int)
        for j in xrange(nannotators):
            for i in xrange(nitems):
                distr = self._theta_to_categorical(theta[j], labels[i])
                annotations[i,j]  = random_categorical(distr, 1)

        # mask annotation value according to loop design
        for l in xrange(nannotators):
            label_idx = np.arange(l+self.nannotators_per_item, l+nannotators) % 8
            annotations[l*nitems_per_loop:(l+1)*nitems_per_loop,
                        label_idx] = MISSING_VALUE

        return annotations
Example #7
0
    def _generate_annotations(self, agreement):
        """Generate triplet annotations given agreement pattern."""
        nitems_per_loop = agreement.shape[0]
        omega = self.omega
        annotations = np.empty((nitems_per_loop, 3), dtype=int)

        for i in xrange(nitems_per_loop):
            # get all compatible annotations
            compatible = _compatibility_tables(self.nclasses)[agreement[i]]
            # compute probability of each possible annotation
            distr = omega[compatible].prod(1)
            distr /= distr.sum()
            # draw annotation
            compatible_idx = random_categorical(distr, 1)[0]
            annotations[i,:] = compatible[compatible_idx, :]
        return annotations
Example #8
0
    def _generate_annotations(self, agreement):
        """Generate triplet annotations given agreement pattern."""
        nitems_per_loop = agreement.shape[0]
        omega = self.omega
        annotations = np.empty((nitems_per_loop, 3), dtype=int)

        for i in range(nitems_per_loop):
            # get all compatible annotations
            compatible = _compatibility_tables(self.nclasses)[agreement[i]]
            # compute probability of each possible annotation
            distr = omega[compatible].prod(1)
            distr /= distr.sum()
            # draw annotation
            compatible_idx = random_categorical(distr, 1)[0]
            annotations[i, :] = compatible[compatible_idx, :]
        return annotations
Example #9
0
    def _generate_agreement(self, incorrect):
        """Return indices of agreement pattern given correctness pattern.

        The indices returned correspond to agreement patterns
        as in Table 3: 0=aaa, 1=aaA, 2=aAa, 3=Aaa, 4=Aa@
        """

        # create tensor A_ijk
        # (cf. Table 3 in Rzhetsky et al., 2009, suppl. mat.)
        alpha = self._compute_alpha()
        agreement_tbl = np.array(
            [[1.,       0.,       0.,       0.,       0.],
             [0.,       1.,       0.,       0.,       0.],
             [0.,       0.,       1.,       0.,       0.],
             [0.,       0.,       0.,       1.,       0.],
             [0.,       0.,       0.,       alpha[0], 1.-alpha[0]],
             [0.,       0.,       alpha[1], 0.,       1.-alpha[1]],
             [0.,       alpha[2], 0.,       0.,       1.-alpha[2]],
             [alpha[3], alpha[4], alpha[5], alpha[6], 1.-alpha[3:].sum()]])

        # this array maps boolean correctness patterns (e.g., CCI) to
        # indices in the agreement tensor, `agreement_tbl`
        correctness_to_agreement_idx = np.array([0, 3, 2, 6, 1, 5, 4, 7])

        # convert correctness pattern to index in the A_ijk tensor
        correct_idx = correctness_to_agreement_idx[
                      incorrect[:,0]*1 + incorrect[:,1]*2 + incorrect[:,2]*4]

        # the indices stored in `agreement` correspond to agreement patterns
        # as in Table 3: 0=aaa, 1=aaA, 2=aAa, 3=Aaa, 4=Aa@
        nitems_per_loop = incorrect.shape[0]
        agreement = np.empty((nitems_per_loop,), dtype=int)
        for i in xrange(nitems_per_loop):
            # generate agreement pattern according to A_ijk
            agreement[i] = random_categorical(
                agreement_tbl[correct_idx[i]], 1)

        return agreement
Example #10
0
    def generate_annotations_from_labels(self, labels):
        """Generate random annotations from the model, given labels

        The method samples random annotations from the conditional probability
        distribution of annotations, :math:`x_i^j`
        given labels, :math:`y_i`.

        Arguments
        ----------
        labels : ndarray, shape = (n_items,), dtype = int
            Set of "true" labels

        Returns
        -------
        annotations : ndarray, shape = (n_items, n_annotators)
            annotations[i,j] is the annotation of annotator j for item i
        """
        theta = self.theta
        nannotators = self.nannotators
        nitems = labels.shape[0]
        nitems_per_loop = np.ceil(float(nitems) / nannotators)

        annotations = np.empty((nitems, nannotators), dtype=int)
        for j in xrange(nannotators):
            for i in xrange(nitems):
                distr = self._theta_to_categorical(theta[j], labels[i])
                annotations[i, j] = random_categorical(distr, 1)

        # mask annotation value according to loop design
        for l in xrange(nannotators):
            label_idx = np.arange(l + self.nannotators_per_item,
                                  l + nannotators) % 8
            annotations[l * nitems_per_loop:(l + 1) * nitems_per_loop,
                        label_idx] = MISSING_VALUE

        return annotations
Example #11
0
 def generate_labels(self, nitems):
     """Generate random labels from the model."""
     return random_categorical(self.gamma, nitems)
Example #12
0
 def test_random_categorical(self):
     distr = np.array([0.0, 0.3, 0.6, 0.05, 0.05])
     nsamples = 10000
     samples = pu.random_categorical(distr, nsamples)
     freq = np.bincount(samples) / float(nsamples)
     np.testing.assert_almost_equal(freq, distr, 2)
Example #13
0
 def generate_labels(self, nitems):
     """Generate random labels from the model."""
     return random_categorical(self.pi, nitems)