class TestLabelers(unittest.TestCase): def setUp(self): self.dataset = StringReader( 'some text ... (c.2708_2711delTTAG, p.V903GfsX905) ... text').read( ) NLTKSplitter().split(self.dataset) TmVarTokenizer().tokenize(self.dataset) part = list(self.dataset.parts())[0] part.annotations.append( Entity(STUB_ENTITY_CLASS_ID, 15, 'c.2708_2711delTTAG')) part.annotations.append( Entity(STUB_ENTITY_CLASS_ID, 35, 'p.V903GfsX905')) def test_bio_labeler(self): BIOLabeler().label(self.dataset) labels = [ token.original_labels[0].value for token in self.dataset.tokens() ] expected = [ 'O', 'O', 'O', 'O', 'O', 'O', 'B-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'O', 'B-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'O', 'O', 'O', 'O', 'O' ] self.assertEqual(labels, expected) def test_bieo_labeler(self): BIEOLabeler().label(self.dataset) labels = [ token.original_labels[0].value for token in self.dataset.tokens() ] expected = [ 'O', 'O', 'O', 'O', 'O', 'O', 'B-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'E-e_x', 'O', 'B-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'I-e_x', 'E-e_x', 'O', 'O', 'O', 'O', 'O' ] self.assertEqual(labels, expected) def test_tmvar_labeler(self): TmVarLabeler(STUB_ENTITY_CLASS_ID).label(self.dataset) labels = [ token.original_labels[0].value for token in self.dataset.tokens() ] expected = [ 'O', 'O', 'O', 'O', 'O', 'O', 'A', 'I', 'P', 'P', 'P', 'T', 'W', 'O', 'A', 'I', 'W', 'P', 'I', 'M', 'P', 'O', 'O', 'O', 'O', 'O' ] self.assertEqual(labels, expected)
def test_generate_patterns_245(self): dataset = StringReader('token c.A436C token').read() NLTKSplitter().split(dataset) TmVarTokenizer().tokenize(dataset) TmVarDictionaryFeatureGenerator().generate(dataset) token_features = [{key: value for key, value in token.features.items() if value is not 'O'} for token in dataset.tokens()] self.assertEqual(token_features[0], {}) self.assertEqual(token_features[1], {'pattern4[0]': 'B', 'pattern2[0]': 'B'}) self.assertEqual(token_features[2], {'pattern4[0]': 'I', 'pattern2[0]': 'I'}) self.assertEqual(token_features[3], {'pattern4[0]': 'I', 'pattern2[0]': 'I', 'pattern5[0]': 'B'}) self.assertEqual(token_features[4], {'pattern4[0]': 'I', 'pattern2[0]': 'I', 'pattern5[0]': 'I'}) self.assertEqual(token_features[5], {'pattern4[0]': 'E', 'pattern2[0]': 'I', 'pattern5[0]': 'E'}) self.assertEqual(token_features[6], {})