Python SpacySentenceSplitter.batch_split_sentences 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: allennlp.data.tokenizers.sentence_splitter

메소드/함수: batch_split_sentences

hotexamples.com에서의 예제들: 4

Python SpacySentenceSplitter.batch_split_sentences - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 allennlp.data.tokenizers.sentence_splitter.SpacySentenceSplitter.batch_split_sentences에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

SpacySentenceSplitter(19)

split_sentences(17)

batch_split_sentences(3)

to_params(1)

예제 #1

파일 보기

class TestSentenceSplitter(AllenNlpTestCase):
    def setUp(self):
        super().setUp()
        self.dep_parse_splitter = SpacySentenceSplitter(rule_based=False)
        self.rule_based_splitter = SpacySentenceSplitter(rule_based=True)

    def test_rule_based_splitter_passes_through_correctly(self):
        text = "This is the first sentence. This is the second sentence! "
        tokens = self.rule_based_splitter.split_sentences(text)
        expected_tokens = [
            "This is the first sentence.", "This is the second sentence!"
        ]
        assert tokens == expected_tokens

    def test_dep_parse_splitter_passes_through_correctly(self):
        text = "This is the first sentence. This is the second sentence! "
        tokens = self.dep_parse_splitter.split_sentences(text)
        expected_tokens = [
            "This is the first sentence.", "This is the second sentence!"
        ]
        assert tokens == expected_tokens

    def test_batch_rule_based_sentence_splitting(self):
        text = [
            "This is a sentence. This is a second sentence.",
            "This isn't a sentence. This is a second sentence! This is a third sentence.",
        ]
        batch_split = self.rule_based_splitter.batch_split_sentences(text)
        separately_split = [
            self.rule_based_splitter.split_sentences(doc) for doc in text
        ]
        assert len(batch_split) == len(separately_split)
        for batch_doc, separate_doc in zip(batch_split, separately_split):
            assert len(batch_doc) == len(separate_doc)
            for batch_sentence, separate_sentence in zip(
                    batch_doc, separate_doc):
                assert batch_sentence == separate_sentence

    def test_batch_dep_parse_sentence_splitting(self):
        text = [
            "This is a sentence. This is a second sentence.",
            "This isn't a sentence. This is a second sentence! This is a third sentence.",
        ]
        batch_split = self.dep_parse_splitter.batch_split_sentences(text)
        separately_split = [
            self.dep_parse_splitter.split_sentences(doc) for doc in text
        ]
        assert len(batch_split) == len(separately_split)
        for batch_doc, separate_doc in zip(batch_split, separately_split):
            assert len(batch_doc) == len(separate_doc)
            for batch_sentence, separate_sentence in zip(
                    batch_doc, separate_doc):
                assert batch_sentence == separate_sentence

예제 #2

파일 보기

class TestSentenceSplitter(AllenNlpTestCase):
    def setUp(self):
        super().setUp()
        self.dep_parse_splitter = SpacySentenceSplitter(rule_based=False)
        self.rule_based_splitter = SpacySentenceSplitter(rule_based=True)

    def test_rule_based_splitter_passes_through_correctly(self):
        text = ("This is the first sentence. This is the second sentence! "
                "Here's the '3rd' sentence - yes, it is. And yes; this is a fourth sentence?")
        tokens = self.rule_based_splitter.split_sentences(text)
        expected_tokens = ["This is the first sentence.", "This is the second sentence!",
                           "Here's the '3rd' sentence - yes, it is.", "And yes; this is a fourth sentence?"]
        assert tokens == expected_tokens

    @pytest.mark.skipif(spacy.__version__ < "2.1", reason="this model changed from 2.0 to 2.1")
    def test_dep_parse_splitter_passes_through_correctly(self):
        text = ("This is the first sentence. This is the second sentence! "
                "Here's the '3rd' sentence - yes, it is. And yes; this is a fourth sentence?")
        tokens = self.dep_parse_splitter.split_sentences(text)
        expected_tokens = ["This is the first sentence.", "This is the second sentence!",
                           "Here's the '3rd' sentence - yes, it is.", "And yes; this is a fourth sentence?"]
        assert tokens == expected_tokens

    def test_batch_rule_based_sentence_splitting(self):
        text = ["This is a sentence. This is a second sentence.",
                "This isn't a sentence. This is a second sentence! This is a third sentence.",
                "This is the 3rd sentence?",
                "Here's the 'fourth' sentence - yes, it is. And this is a second sentence."]
        batch_split = self.rule_based_splitter.batch_split_sentences(text)
        separately_split = [self.rule_based_splitter.split_sentences(doc) for doc in text]
        assert len(batch_split) == len(separately_split)
        for batch_doc, separate_doc in zip(batch_split, separately_split):
            assert len(batch_doc) == len(separate_doc)
            for batch_sentence, separate_sentence in zip(batch_doc, separate_doc):
                assert batch_sentence == separate_sentence

    def test_batch_dep_parse_sentence_splitting(self):
        text = ["This is a sentence. This is a second sentence.",
                "This isn't a sentence. This is a second sentence! This is a third sentence.",
                "This is the 3rd sentence?",
                "Here's the 'fourth' sentence - yes, it is. And this is a second sentence."]
        batch_split = self.dep_parse_splitter.batch_split_sentences(text)
        separately_split = [self.dep_parse_splitter.split_sentences(doc) for doc in text]
        assert len(batch_split) == len(separately_split)
        for batch_doc, separate_doc in zip(batch_split, separately_split):
            assert len(batch_doc) == len(separate_doc)
            for batch_sentence, separate_sentence in zip(batch_doc, separate_doc):
                assert batch_sentence == separate_sentence

예제 #3

파일 보기

파일: sentence_splitter_test.py 프로젝트: apmoore1/allennlp

class TestSentenceSplitter(AllenNlpTestCase):
    def setUp(self):
        super(TestSentenceSplitter, self).setUp()
        self.dep_parse_splitter = SpacySentenceSplitter(rule_based=False)
        self.rule_based_splitter = SpacySentenceSplitter(rule_based=True)

    def test_rule_based_splitter_passes_through_correctly(self):
        text = ("This is the first sentence. This is the second sentence! "
                "Here's the '3rd' sentence - yes, it is. And yes; this is a fourth sentence?")
        tokens = self.rule_based_splitter.split_sentences(text)
        expected_tokens = ["This is the first sentence.", "This is the second sentence!",
                           "Here's the '3rd' sentence - yes, it is.", "And yes; this is a fourth sentence?"]
        assert tokens == expected_tokens

    def test_dep_parse_splitter_passes_through_correctly(self):
        text = ("This is the first sentence. This is the second sentence! "
                "Here's the '3rd' sentence - yes, it is. And yes; this is a fourth sentence?")
        tokens = self.dep_parse_splitter.split_sentences(text)
        expected_tokens = ["This is the first sentence.", "This is the second sentence!",
                           "Here's the '3rd' sentence -", "yes, it is.", "And yes; this is a fourth sentence?"]
        assert tokens == expected_tokens

    def test_batch_rule_based_sentence_splitting(self):
        text = ["This is a sentence. This is a second sentence.",
                "This isn't a sentence. This is a second sentence! This is a third sentence.",
                "This is the 3rd sentence?",
                "Here's the 'fourth' sentence - yes, it is. And this is a second sentence."]
        batch_split = self.rule_based_splitter.batch_split_sentences(text)
        separately_split = [self.rule_based_splitter.split_sentences(doc) for doc in text]
        assert len(batch_split) == len(separately_split)
        for batch_doc, separate_doc in zip(batch_split, separately_split):
            assert len(batch_doc) == len(separate_doc)
            for batch_sentence, separate_sentence in zip(batch_doc, separate_doc):
                assert batch_sentence == separate_sentence

    def test_batch_dep_parse_sentence_splitting(self):
        text = ["This is a sentence. This is a second sentence.",
                "This isn't a sentence. This is a second sentence! This is a third sentence.",
                "This is the 3rd sentence?",
                "Here's the 'fourth' sentence - yes, it is. And this is a second sentence."]
        batch_split = self.dep_parse_splitter.batch_split_sentences(text)
        separately_split = [self.dep_parse_splitter.split_sentences(doc) for doc in text]
        assert len(batch_split) == len(separately_split)
        for batch_doc, separate_doc in zip(batch_split, separately_split):
            assert len(batch_doc) == len(separate_doc)
            for batch_sentence, separate_sentence in zip(batch_doc, separate_doc):
                assert batch_sentence == separate_sentence

예제 #4

파일 보기

파일: sentence_splitter_test.py 프로젝트: himkt/allennlp

class TestSentenceSplitter(AllenNlpTestCase):
    def setup_method(self):
        super().setup_method()
        self.dep_parse_splitter = SpacySentenceSplitter(rule_based=False)
        self.rule_based_splitter = SpacySentenceSplitter(rule_based=True)

    def test_rule_based_splitter_passes_through_correctly(self):
        text = "This is the first sentence. This is the second sentence! "
        tokens = self.rule_based_splitter.split_sentences(text)
        expected_tokens = [
            "This is the first sentence.", "This is the second sentence!"
        ]
        assert tokens == expected_tokens

    def test_dep_parse_splitter_passes_through_correctly(self):
        text = "This is the first sentence. This is the second sentence! "
        tokens = self.dep_parse_splitter.split_sentences(text)
        expected_tokens = [
            "This is the first sentence.", "This is the second sentence!"
        ]
        assert tokens == expected_tokens

    def test_batch_rule_based_sentence_splitting(self):
        text = [
            "This is a sentence. This is a second sentence.",
            "This isn't a sentence. This is a second sentence! This is a third sentence.",
        ]
        batch_split = self.rule_based_splitter.batch_split_sentences(text)
        separately_split = [
            self.rule_based_splitter.split_sentences(doc) for doc in text
        ]
        assert len(batch_split) == len(separately_split)
        for batch_doc, separate_doc in zip(batch_split, separately_split):
            assert len(batch_doc) == len(separate_doc)
            for batch_sentence, separate_sentence in zip(
                    batch_doc, separate_doc):
                assert batch_sentence == separate_sentence

    def test_batch_dep_parse_sentence_splitting(self):
        text = [
            "This is a sentence. This is a second sentence.",
            "This isn't a sentence. This is a second sentence! This is a third sentence.",
        ]
        batch_split = self.dep_parse_splitter.batch_split_sentences(text)
        separately_split = [
            self.dep_parse_splitter.split_sentences(doc) for doc in text
        ]
        assert len(batch_split) == len(separately_split)
        for batch_doc, separate_doc in zip(batch_split, separately_split):
            assert len(batch_doc) == len(separate_doc)
            for batch_sentence, separate_sentence in zip(
                    batch_doc, separate_doc):
                assert batch_sentence == separate_sentence

    def test_to_params(self):
        params = self.dep_parse_splitter.to_params()
        assert isinstance(params, Params)
        assert params.params == {
            "type": "spacy",
            "language": self.dep_parse_splitter._language,
            "rule_based": self.dep_parse_splitter._rule_based,
        }