def generate_annotations_from_labels(self, labels): """Generate random annotations from the model, given labels The method samples random annotations from the conditional probability distribution of annotations, :math:`x_i^j` given labels, :math:`y_i`. Arguments ---------- labels : ndarray, shape = (n_items,), dtype = int Set of "true" labels Returns ------- annotations : ndarray, shape = (n_items, n_annotators) annotations[i,j] is the annotation of annotator j for item i """ theta = self.theta nitems = labels.shape[0] annotations = np.empty((nitems, self.nannotators), dtype=int) for j in range(self.nannotators): for i in range(nitems): distr = self._theta_to_categorical(theta[j], labels[i]) annotations[i, j] = random_categorical(distr, 1) return annotations
def generate_annotations_from_labels(self, labels): """Generate random annotations from the model, given labels The method samples random annotations from the conditional probability distribution of annotations, :math:`x_i^j` given labels, :math:`y_i`: :math:`x_i^j \sim \mathrm{Categorical}(\mathbf{\\theta_j^{y_i}})` Arguments ---------- labels : ndarray, shape = (n_items,), dtype = int Set of "true" labels Returns ------- annotations : ndarray, shape = (n_items, n_annotators) annotations[i,j] is the annotation of annotator j for item i """ nitems = labels.shape[0] annotations = np.empty((nitems, self.nannotators), dtype=int) for j in range(self.nannotators): for i in range(nitems): annotations[i, j] = (random_categorical( self.theta[j, labels[i], :], 1)) return annotations
def _generate_agreement(self, incorrect): """Return indices of agreement pattern given correctness pattern. The indices returned correspond to agreement patterns as in Table 3: 0=aaa, 1=aaA, 2=aAa, 3=Aaa, 4=Aa@ """ # create tensor A_ijk # (cf. Table 3 in Rzhetsky et al., 2009, suppl. mat.) alpha = self._compute_alpha() agreement_tbl = np.array( [[1., 0., 0., 0., 0.], [0., 1., 0., 0., 0.], [0., 0., 1., 0., 0.], [0., 0., 0., 1., 0.], [0., 0., 0., alpha[0], 1. - alpha[0]], [0., 0., alpha[1], 0., 1. - alpha[1]], [0., alpha[2], 0., 0., 1. - alpha[2]], [alpha[3], alpha[4], alpha[5], alpha[6], 1. - alpha[3:].sum()]]) # this array maps boolean correctness patterns (e.g., CCI) to # indices in the agreement tensor, `agreement_tbl` correctness_to_agreement_idx = np.array([0, 3, 2, 6, 1, 5, 4, 7]) # convert correctness pattern to index in the A_ijk tensor correct_idx = correctness_to_agreement_idx[incorrect[:, 0] * 1 + incorrect[:, 1] * 2 + incorrect[:, 2] * 4] # the indices stored in `agreement` correspond to agreement patterns # as in Table 3: 0=aaa, 1=aaA, 2=aAa, 3=Aaa, 4=Aa@ nitems_per_loop = incorrect.shape[0] agreement = np.empty((nitems_per_loop, ), dtype=int) for i in range(nitems_per_loop): # generate agreement pattern according to A_ijk agreement[i] = random_categorical(agreement_tbl[correct_idx[i]], 1) return agreement
def generate_annotations_from_labels(self, labels): """Generate random annotations from the model, given labels The method samples random annotations from the conditional probability distribution of annotations, :math:`x_i^j` given labels, :math:`y_i`: :math:`x_i^j \sim \mathrm{Categorical}(\mathbf{\\theta_j^{y_i}})` Arguments ---------- labels : ndarray, shape = (n_items,), dtype = int Set of "true" labels Returns ------- annotations : ndarray, shape = (n_items, n_annotators) annotations[i,j] is the annotation of annotator j for item i """ nitems = labels.shape[0] annotations = np.empty((nitems, self.nannotators), dtype=int) for j in xrange(self.nannotators): for i in xrange(nitems): annotations[i,j] = ( random_categorical(self.theta[j,labels[i],:], 1)) return annotations
def generate_annotations_from_labels(self, labels): """Generate random annotations from the model, given labels The method samples random annotations from the conditional probability distribution of annotations, :math:`x_i^j` given labels, :math:`y_i`. Arguments ---------- labels : ndarray, shape = (n_items,), dtype = int Set of "true" labels Returns ------- annotations : ndarray, shape = (n_items, n_annotators) annotations[i,j] is the annotation of annotator j for item i """ theta = self.theta nitems = labels.shape[0] annotations = np.empty((nitems, self.nannotators), dtype=int) for j in xrange(self.nannotators): for i in xrange(nitems): distr = self._theta_to_categorical(theta[j], labels[i]) annotations[i,j] = random_categorical(distr, 1) return annotations
def generate_annotations_from_labels(self, labels): """Generate random annotations from the model, given labels The method samples random annotations from the conditional probability distribution of annotations, :math:`x_i^j` given labels, :math:`y_i`. Arguments ---------- labels : ndarray, shape = (n_items,), dtype = int Set of "true" labels Returns ------- annotations : ndarray, shape = (n_items, n_annotators) annotations[i,j] is the annotation of annotator j for item i """ theta = self.theta nannotators = self.nannotators nitems = labels.shape[0] nitems_per_loop = np.ceil(float(nitems) / nannotators) annotations = np.empty((nitems, nannotators), dtype=int) for j in xrange(nannotators): for i in xrange(nitems): distr = self._theta_to_categorical(theta[j], labels[i]) annotations[i,j] = random_categorical(distr, 1) # mask annotation value according to loop design for l in xrange(nannotators): label_idx = np.arange(l+self.nannotators_per_item, l+nannotators) % 8 annotations[l*nitems_per_loop:(l+1)*nitems_per_loop, label_idx] = MISSING_VALUE return annotations
def _generate_annotations(self, agreement): """Generate triplet annotations given agreement pattern.""" nitems_per_loop = agreement.shape[0] omega = self.omega annotations = np.empty((nitems_per_loop, 3), dtype=int) for i in xrange(nitems_per_loop): # get all compatible annotations compatible = _compatibility_tables(self.nclasses)[agreement[i]] # compute probability of each possible annotation distr = omega[compatible].prod(1) distr /= distr.sum() # draw annotation compatible_idx = random_categorical(distr, 1)[0] annotations[i,:] = compatible[compatible_idx, :] return annotations
def _generate_annotations(self, agreement): """Generate triplet annotations given agreement pattern.""" nitems_per_loop = agreement.shape[0] omega = self.omega annotations = np.empty((nitems_per_loop, 3), dtype=int) for i in range(nitems_per_loop): # get all compatible annotations compatible = _compatibility_tables(self.nclasses)[agreement[i]] # compute probability of each possible annotation distr = omega[compatible].prod(1) distr /= distr.sum() # draw annotation compatible_idx = random_categorical(distr, 1)[0] annotations[i, :] = compatible[compatible_idx, :] return annotations
def _generate_agreement(self, incorrect): """Return indices of agreement pattern given correctness pattern. The indices returned correspond to agreement patterns as in Table 3: 0=aaa, 1=aaA, 2=aAa, 3=Aaa, 4=Aa@ """ # create tensor A_ijk # (cf. Table 3 in Rzhetsky et al., 2009, suppl. mat.) alpha = self._compute_alpha() agreement_tbl = np.array( [[1., 0., 0., 0., 0.], [0., 1., 0., 0., 0.], [0., 0., 1., 0., 0.], [0., 0., 0., 1., 0.], [0., 0., 0., alpha[0], 1.-alpha[0]], [0., 0., alpha[1], 0., 1.-alpha[1]], [0., alpha[2], 0., 0., 1.-alpha[2]], [alpha[3], alpha[4], alpha[5], alpha[6], 1.-alpha[3:].sum()]]) # this array maps boolean correctness patterns (e.g., CCI) to # indices in the agreement tensor, `agreement_tbl` correctness_to_agreement_idx = np.array([0, 3, 2, 6, 1, 5, 4, 7]) # convert correctness pattern to index in the A_ijk tensor correct_idx = correctness_to_agreement_idx[ incorrect[:,0]*1 + incorrect[:,1]*2 + incorrect[:,2]*4] # the indices stored in `agreement` correspond to agreement patterns # as in Table 3: 0=aaa, 1=aaA, 2=aAa, 3=Aaa, 4=Aa@ nitems_per_loop = incorrect.shape[0] agreement = np.empty((nitems_per_loop,), dtype=int) for i in xrange(nitems_per_loop): # generate agreement pattern according to A_ijk agreement[i] = random_categorical( agreement_tbl[correct_idx[i]], 1) return agreement
def generate_annotations_from_labels(self, labels): """Generate random annotations from the model, given labels The method samples random annotations from the conditional probability distribution of annotations, :math:`x_i^j` given labels, :math:`y_i`. Arguments ---------- labels : ndarray, shape = (n_items,), dtype = int Set of "true" labels Returns ------- annotations : ndarray, shape = (n_items, n_annotators) annotations[i,j] is the annotation of annotator j for item i """ theta = self.theta nannotators = self.nannotators nitems = labels.shape[0] nitems_per_loop = np.ceil(float(nitems) / nannotators) annotations = np.empty((nitems, nannotators), dtype=int) for j in xrange(nannotators): for i in xrange(nitems): distr = self._theta_to_categorical(theta[j], labels[i]) annotations[i, j] = random_categorical(distr, 1) # mask annotation value according to loop design for l in xrange(nannotators): label_idx = np.arange(l + self.nannotators_per_item, l + nannotators) % 8 annotations[l * nitems_per_loop:(l + 1) * nitems_per_loop, label_idx] = MISSING_VALUE return annotations
def generate_labels(self, nitems): """Generate random labels from the model.""" return random_categorical(self.gamma, nitems)
def test_random_categorical(self): distr = np.array([0.0, 0.3, 0.6, 0.05, 0.05]) nsamples = 10000 samples = pu.random_categorical(distr, nsamples) freq = np.bincount(samples) / float(nsamples) np.testing.assert_almost_equal(freq, distr, 2)
def generate_labels(self, nitems): """Generate random labels from the model.""" return random_categorical(self.pi, nitems)