コード例 #1
0
    def test_pipeline4(self, batch_size):
        """Tests a chain of Pack->Batch->Pack."""

        nlp = Pipeline[MultiPack]()
        reader = MultiPackSentenceReader()
        nlp.set_reader(reader)
        dummy1 = DummyPackProcessor()
        nlp.add(component=dummy1, selector=FirstPackSelector())

        dummy2 = DummyFixedSizeBatchProcessor()
        config = {"batcher": {"batch_size": batch_size}}
        nlp.add(component=dummy2, config=config, selector=FirstPackSelector())

        dummy3 = DummyPackProcessor()
        nlp.add(component=dummy3, selector=FirstPackSelector())
        nlp.initialize()
        data_path = os.path.join(data_samples_root, "random_texts", "0.txt")

        num_packs = 0
        for pack in nlp.process_dataset(data_path):
            types = list(pack.get_pack("pack").get_entries_of(NewType))
            num_packs += 1
            self.assertEqual(len(types), 1)
            self.assertEqual(types[0].value, "[PACK][BATCH][PACK]")

        # check that all packs are yielded
        self.assertEqual(num_packs, reader.count)
コード例 #2
0
ファイル: pipeline_test.py プロジェクト: awoziji/forte
    def test_pipeline7(self, batch_size1, batch_size2, batch_size3):
        # Tests a chain of Batch->Batch->Batch->Pack with different batch sizes.

        nlp = Pipeline[MultiPack]()
        reader = MultiPackSentenceReader()
        nlp.set_reader(reader)
        dummy1 = DummmyFixedSizeBatchProcessor()
        config = {"batcher": {"batch_size": batch_size1}}
        nlp.add(component=dummy1, config=config, selector=FirstPackSelector())
        dummy2 = DummmyFixedSizeBatchProcessor()
        config = {"batcher": {"batch_size": batch_size2}}
        nlp.add(component=dummy2, config=config, selector=FirstPackSelector())
        dummy3 = DummmyFixedSizeBatchProcessor()
        config = {"batcher": {"batch_size": batch_size3}}
        nlp.add(component=dummy3, config=config, selector=FirstPackSelector())
        dummy4 = DummyPackProcessor()
        nlp.add(component=dummy4, selector=FirstPackSelector())
        nlp.initialize()
        data_path = data_samples_root + "/random_texts/0.txt"

        num_packs = 0
        for pack in nlp.process_dataset(data_path):
            types = list(pack.get_pack("pack").get_entries_by_type(NewType))
            num_packs += 1
            self.assertEqual(len(types), 1)
            self.assertEqual(types[0].value, "[BATCH][BATCH][BATCH][PACK]")

        # check that all packs are yielded
        self.assertEqual(num_packs, reader.count)
コード例 #3
0
    def test_pipeline_multipack_selector(self):
        """Tests a batch processor only."""

        nlp = Pipeline[MultiPack]()
        reader = MultiPackSentenceReader()
        nlp.set_reader(reader)
        dummy = DummyFixedSizeBatchProcessor()
        config = {
            "batcher": {
                "batch_size": 4,
                "context_type": "ft.onto.base_ontology.Sentence",
            },
        }
        nlp.add(component=dummy, config=config, selector=FirstPackSelector())
        nlp.initialize()
        data_path = data_samples_root + "/random_texts/0.txt"
        num_packs = 0
        for pack in nlp.process_dataset(data_path):
            types = list(pack.get_pack("pack").get_entries_of(NewType))
            num_packs += 1
            self.assertEqual(len(types), 1)
            self.assertEqual(types[0].value, "[BATCH]")

        # check that all packs are yielded
        self.assertEqual(num_packs, reader.count)
コード例 #4
0
    def test_pipeline_multipack_three_stack_batch_diff_size_pack_chain(
        self, batch_size1, batch_size2, batch_size3
    ):
        # Tests a chain of Batch->Batch->Batch->Pack with different batch sizes.

        nlp = Pipeline[MultiPack]()
        reader = MultiPackSentenceReader()
        nlp.set_reader(reader)
        dummy1 = DummyFixedSizeBatchProcessor()
        config = {
            "batcher": {
                "batch_size": batch_size1,
                "context_type": "ft.onto.base_ontology.Sentence",
            },
        }
        nlp.add(component=dummy1, config=config, selector=FirstPackSelector())
        dummy2 = DummyFixedSizeBatchProcessor()
        config = {
            "batcher": {
                "batch_size": batch_size2,
                "context_type": "ft.onto.base_ontology.Sentence",
            },
        }
        nlp.add(component=dummy2, config=config, selector=FirstPackSelector())
        dummy3 = DummyFixedSizeBatchProcessor()
        config = {
            "batcher": {
                "batch_size": batch_size3,
                "context_type": "ft.onto.base_ontology.Sentence",
            },
        }
        nlp.add(component=dummy3, config=config, selector=FirstPackSelector())
        dummy4 = DummyPackProcessor()
        nlp.add(component=dummy4, selector=FirstPackSelector())
        nlp.initialize()
        data_path = os.path.join(data_samples_root, "random_texts", "0.txt")

        num_packs = 0
        for pack in nlp.process_dataset(data_path):
            types = list(pack.get_pack("pack").get_entries_of(NewType))
            num_packs += 1
            self.assertEqual(len(types), 1)
            self.assertEqual(types[0].value, "[BATCH][BATCH][BATCH][PACK]")

        # check that all packs are yielded
        self.assertEqual(num_packs, reader.count)
コード例 #5
0
    def test_first_pack_selector(self) -> None:
        selector = FirstPackSelector()
        selector.initialize()
        packs = list(selector.select(self.multi_pack))
        self.assertEqual(len(packs), 1)
        self.assertEqual(packs[0].pack_name, "1")

        # Test reverse selection.
        selector.initialize({"reverse_selection": True})
        packs = list(selector.select(self.multi_pack))
        self.assertEqual(len(packs), len(self.multi_pack.packs) - 1)
コード例 #6
0
ファイル: pipeline_test.py プロジェクト: awoziji/forte
    def test_pipeline1(self):
        """Tests a pack processor only."""

        nlp = Pipeline[MultiPack]()
        reader = MultiPackSentenceReader()
        nlp.set_reader(reader)
        dummy = DummyPackProcessor()
        nlp.add(dummy, selector=FirstPackSelector())
        nlp.initialize()
        data_path = data_samples_root + "/random_texts/0.txt"
        num_packs = 0
        for pack in nlp.process_dataset(data_path):
            types = list(pack.get_pack("pack").get_entries_by_type(NewType))
            num_packs += 1
            self.assertEqual(len(types), 1)
            self.assertEqual(types[0].value, "[PACK]")

        # check that all packs are yielded
        self.assertEqual(num_packs, reader.count)
コード例 #7
0
    def test_pipeline2(self):
        """Tests a batch processor only."""

        nlp = Pipeline()
        reader = MultiPackSentenceReader()
        nlp.set_reader(reader)
        dummy = DummmyFixedSizeBatchProcessor()
        config = {"batcher": {"batch_size": 4}}
        nlp.add_processor(processor=dummy,
                          config=config,
                          selector=FirstPackSelector())
        nlp.initialize()
        data_path = "data_samples/random_texts/0.txt"
        num_packs = 0
        for pack in nlp.process_dataset(data_path):
            types = list(pack.get_pack("pack").get_entries_by_type(NewType))
            num_packs += 1
            self.assertEqual(len(types), 1)
            self.assertEqual(types[0].value, "[BATCH]")

        # check that all packs are yielded
        self.assertEqual(num_packs, reader.count)
コード例 #8
0
ファイル: selector_test.py プロジェクト: mgupta1410/forte-1
 def test_first_pack_selector(self) -> None:
     selector = FirstPackSelector()
     packs = list(selector.select(self.multi_pack))
     self.assertEqual(len(packs), 1)
     self.assertEqual(packs[0].meta.doc_id, "1")