def test_join_score_2_02(self): tokens = [(i * 10, i * 10 + 9) for i in range(8)] predicted = np.array([0, 0, 1, 1, 0, 0, 2, 2, 0, 0]) phrases = list(PhraseConstructor.join_tokens_by_score( tokens, predicted, max_zeros=1, min_token_score=2)) self.assertEqual(0, len(phrases)) phrases = list(PhraseConstructor.join_tokens_by_score( tokens, predicted, max_zeros=2, min_token_score=2)) self.assertEqual(1, len(phrases)) self.assertEqual((30, 79), phrases[0])
def test_join_class_nonstrict_01(self): tokens = [(i * 10, i * 10 + 9) for i in range(8)] predicted = np.array([0, 0, 1, 1, 2, 2, 0, 0]) phrases = list(PhraseConstructor.join_tokens_by_class( tokens, predicted, strict=False)) self.assertEqual(1, len(phrases)) self.assertEqual((20, 69), phrases[0])
def test_join_score_1_01(self): tokens = [(i * 10, i * 10 + 9) for i in range(8)] predicted = np.array([0, 0, 1, 1, 2, 2, 0, 0]) phrases = list(PhraseConstructor.join_tokens_by_score( tokens, predicted, max_zeros=1, min_token_score=1)) self.assertEqual(2, len(phrases)) self.assertEqual((20, 29), phrases[0]) self.assertEqual((30, 59), phrases[1])
def test_join_class_nonstrict_02(self): tokens = [(i * 10, i * 10 + 9) for i in range(10)] predicted = np.array([0, 0, 0, 1, 0, 2, 2, 0, 0]) phrases = list(PhraseConstructor.join_tokens_by_class( tokens, predicted, strict=False)) self.assertEqual(2, len(phrases)) self.assertEqual((30, 49), phrases[0]) self.assertEqual((50, 79), phrases[1])
def test_join_class_strict_02(self): tokens = [(i * 10, i * 10 + 9) for i in range(10)] predicted = np.array([0, 0, 0, 1, 0, 2, 2, 0, 0]) phrases = list( PhraseConstructor.join_tokens_by_class(tokens, predicted, strict=True)) self.assertEqual(0, len(phrases))
def predict_text(self, text: str, join_settings: PhraseConstructorSettings = None, feature_mask: List[int] = None) -> Generator[Tuple[int, int], None, None]: feature_data, tokens = self.model.get_feature_data(text, feature_mask) predicted_class = self.model.model.predict(feature_data) join_settings = join_settings or self.join_token_settings yield from PhraseConstructor.join_tokens( tokens, predicted_class, settings=join_settings, feature_mask=feature_mask)