def testGetMaskedStems(self): s = Sentence(TEST_TOKENS) s.addAnnotation('type1', 2, 4) s.addAnnotation('type2', 6) s.addAnnotation('type2', 8, 9) self.assertListEqual(list(s.maskedStems(3, 7)), ['type1', 'stem4', 'stem5', 'type2'])
def testGetMaskedStems(self): s = Sentence(TEST_TOKENS) s.addAnnotation('type1', 2, 4) s.addAnnotation('type2', 6) s.addAnnotation('type2', 8, 9) self.assertListEqual(list(s.maskedStems(3, 7)), [ 'type1', 'stem4', 'stem5', 'type2' ])
def asDict(sentence: Sentence, ngrams=2): """Convert a :class:`fnl.text.sentence.Sentence` into a feature dictionary.""" d = {'gene-count': sentence.countEntity('B-gene')} stems = list(sentence.maskedStems()) pos = sentence.posTags() tokens = Counter('{}/{}'.format(s, t) for s, t in zip(stems, pos)) d.update(tokens) if "TARGET/NN" in d and "FACTOR/NN" in d: d['has-all-entities'] = 1 gram = list(stems) while ngrams > 1: ngrams =- 1 tokens = Counter('{} {}'.format(s, g) for s, g in zip(stems, gram[1:])) d.update(tokens) return d
def asDict(sentence: Sentence, ngrams=2): """Convert a :class:`fnl.text.sentence.Sentence` into a feature dictionary.""" d = {'gene-count': sentence.countEntity('B-gene')} stems = list(sentence.maskedStems()) pos = sentence.posTags() tokens = Counter('{}/{}'.format(s, t) for s, t in zip(stems, pos)) d.update(tokens) if "TARGET/NN" in d and "FACTOR/NN" in d: d['has-all-entities'] = 1 gram = list(stems) while ngrams > 1: ngrams = -1 tokens = Counter('{} {}'.format(s, g) for s, g in zip(stems, gram[1:])) d.update(tokens) return d