def test_from_array(self): x = np.array([ [1, 2, -1, 3], [-1, -1, np.nan, 1], [1, 3, 3, 2] ]) expected = np.array([ [0, 1, MV, 2], [MV, MV, MV, 0], [0, 2, 2, 1] ]) anno = AnnotationsContainer.from_array(x) np.testing.assert_equal(anno.annotations, expected) self.assertEqual(anno.nclasses, 3) self.assertEqual(anno.labels, [1, 2, 3]) self.assertEqual(anno.missing_values, [np.nan, -1]) # now using a list-of-lists x = x.tolist() anno = AnnotationsContainer.from_array(x) np.testing.assert_equal(anno.annotations, expected) self.assertEqual(anno.nclasses, 3) self.assertEqual(anno.labels, [1, 2, 3]) self.assertEqual(anno.missing_values, [np.nan, -1]) # inconsistent number of elements x[1] = [1, 2] self.assertRaises(PyannoValueError, AnnotationsContainer.from_array, x)
def _new_annotations_fired(self): """Create an empty annotations set.""" annotations = CreateNewAnnotationsDialog.create_annotations_dialog() if annotations is not None: name = self.application.database.get_available_id() anno_cont = AnnotationsContainer.from_array(annotations, name=name) self.set_annotations(anno_cont)
def estimate_quality_instance_level(annotations, pmids): m, workers = get_M_overall(annotations, pmids) instance_model = ModelB.create_initial_state(2, len(workers)) anno = AnnotationsContainer.from_array(m, missing_values=[2]) instance_model.map(anno.annotations) proxy_skill = (instance_model.theta[:,0,0] + instance_model.theta[:,1,1]) / 2.0 return dict(zip(workers, proxy_skill))
def setUp(self): self.tmp_filename = mktemp(prefix='tmp_pyanno_db_') # fixtures self.model1 = ModelA.create_initial_state(4) self.annotations1 = self.model1.generate_annotations(100) self.value1 = self.model1.log_likelihood(self.annotations1) self.anno_container1 = AnnotationsContainer.from_array( self.annotations1) self.data_id1 = 'bogus.txt' self.model2 = ModelB.create_initial_state(4, 8) self.annotations2 = self.model2.generate_annotations(100) self.value2 = self.model2.log_likelihood(self.annotations2) self.anno_container2 = AnnotationsContainer.from_array( self.annotations2) self.data_id2 = 'bogus2.txt'
def _edit_data_fired(self): data_view = DataView(data=self.annotations_container.raw_annotations) data_view.edit_traits(kind='livemodal', parent=self.info.ui.control) self.annotations_container = AnnotationsContainer.from_array( data_view.data, name=self.annotations_container.name) if self.application is not None: self.application.main_window.set_annotations( self.annotations_container)
def _edit_data_fired(self): data_view = DataView(data=self.annotations_container.raw_annotations) data_view.edit_traits(kind='livemodal', parent=self.info.ui.control) self.annotations_container = AnnotationsContainer.from_array( data_view.data, name = self.annotations_container.name ) if self.application is not None: self.application.main_window.set_annotations( self.annotations_container)
def main(): """ Entry point for standalone testing/debugging. """ from pyanno.modelBt_loopdesign import ModelBtLoopDesign model = ModelBtLoopDesign.create_initial_state(5) annotations = model.generate_annotations(2) anno = AnnotationsContainer.from_array(annotations, name='blah') model_view = AnnotationsView(annotations_container=anno, model=HasTraits()) model_view.configure_traits() return model, annotations, model_view
def test_from_file_with_commas(self): s = """ 1,2,3, -1, -1, 1, 1, 2, 3 """ buffer = StringIO(s) anno = AnnotationsContainer._from_file_object(buffer) expected = np.array([[0, 1, 2], [MV, MV, 0], [0, 1, 2]]) np.testing.assert_equal(anno.annotations, expected)
def test_from_array(self): x = np.array([[1, 2, -1, 3], [-1, -1, np.nan, 1], [1, 3, 3, 2]]) expected = np.array([[0, 1, MV, 2], [MV, MV, MV, 0], [0, 2, 2, 1]]) anno = AnnotationsContainer.from_array(x) np.testing.assert_equal(anno.annotations, expected) self.assertEqual(anno.nclasses, 3) self.assertEqual(anno.labels, [1, 2, 3]) self.assertEqual(anno.missing_values, [np.nan, -1]) # now using a list-of-lists x = x.tolist() anno = AnnotationsContainer.from_array(x) np.testing.assert_equal(anno.annotations, expected) self.assertEqual(anno.nclasses, 3) self.assertEqual(anno.labels, [1, 2, 3]) self.assertEqual(anno.missing_values, [np.nan, -1]) # inconsistent number of elements x[1] = [1, 2] self.assertRaises(PyannoValueError, AnnotationsContainer.from_array, x)
def test_from_file_with_commas(self): s = """ 1,2,3, -1, -1, 1, 1, 2, 3 """ buffer = StringIO(s) anno = AnnotationsContainer._from_file_object(buffer) expected = np.array([ [0, 1, 2], [MV, MV, 0], [0, 1, 2] ]) np.testing.assert_equal(anno.annotations, expected)
def estimate_quality_for_q(annotations, qnum, pmids=None): m, workers = get_M_q(annotations, qnum, pmids=pmids) q_model = ModelB.create_initial_state(2, len(workers)) anno = AnnotationsContainer.from_array(m, missing_values=[2]) q_model.map(anno.annotations) ''' pi[k] is the probability of label k theta[j,k,k'] is the probability that annotator j reports label k' for an item whose real label is k, i.e. P( annotator j chooses k' | real label = k) ''' # this is a simple mean of sensitivity and specificity # @TODO revisit? proxy_skill = (q_model.theta[:, 0, 0] + q_model.theta[:, 1, 1]) / 2.0 return dict(zip(workers, proxy_skill))
def test_from_file_numerical_labels(self): s = """ 1 -1 2 2 1 3 -1 -1 3 """ buffer = StringIO(s) anno = AnnotationsContainer._from_file_object(buffer) expected = np.array([[0, MV, 1], [1, 0, 2], [MV, MV, 2]]) np.testing.assert_equal(anno.annotations, expected) self.assertEqual(anno.nclasses, 3) self.assertEqual(anno.labels, ['1', '2', '3']) self.assertEqual(anno.missing_values, ['-1'])
def test_from_file_string_labels(self): s = """ D B A A B C A * C A B B D D * """ buffer = StringIO(s) anno = AnnotationsContainer._from_file_object(buffer) self.assertEqual(anno.nclasses, 4) self.assertEqual(anno.nannotators, 5) # labels should be sorted self.assertEqual(anno.labels, ['A', 'B', 'C', 'D']) self.assertEqual(anno.missing_values, ['*']) expected = np.array( [[3, 1, 0, 0, 1], [2, 0, MV, 2, 0], [1, 1, 3, 3, MV]], dtype=int) np.testing.assert_equal(anno.annotations, expected)
def test_from_file_numerical_labels(self): s = """ 1 -1 2 2 1 3 -1 -1 3 """ buffer = StringIO(s) anno = AnnotationsContainer._from_file_object(buffer) expected = np.array([ [0, MV, 1], [1, 0, 2], [MV, MV, 2] ]) np.testing.assert_equal(anno.annotations, expected) self.assertEqual(anno.nclasses, 3) self.assertEqual(anno.labels, ['1', '2', '3']) self.assertEqual(anno.missing_values, ['-1'])
def test_from_file_string_labels(self): s = """ D B A A B C A * C A B B D D * """ buffer = StringIO(s) anno = AnnotationsContainer._from_file_object(buffer) self.assertEqual(anno.nclasses, 4) self.assertEqual(anno.nannotators, 5) # labels should be sorted self.assertEqual(anno.labels, ['A', 'B', 'C', 'D']) self.assertEqual(anno.missing_values, ['*']) expected = np.array([ [3, 1, 0, 0, 1], [2, 0, MV, 2, 0], [1, 1, 3, 3, MV] ], dtype=int) np.testing.assert_equal(anno.annotations, expected)
def _create_new_entry(model, annotations, id): value = model.log_likelihood(annotations) ac = AnnotationsContainer.from_array(annotations, name=id) db.store_result(id, ac, model, value)
def _annotations_view_default(self): anno = AnnotationsContainer.from_array([[0]], name='<undefined>') return AnnotationsView(annotations_container=anno, nclasses=self.model.nclasses, application=self.application, model=HasTraits())
def _update_annotations_file(self): logger.info('Load file {}'.format(self.annotations_file)) anno = AnnotationsContainer.from_file(self.annotations_file) self.set_annotations(anno)
def _annotations_view_default(self): anno = AnnotationsContainer.from_array([[0]], name='<undefined>') return AnnotationsView(annotations_container = anno, nclasses = self.model.nclasses, application = self.application, model=HasTraits())