def test_pipeline4(self, batch_size): """Tests a chain of Pack->Batch->Pack.""" nlp = Pipeline[MultiPack]() reader = MultiPackSentenceReader() nlp.set_reader(reader) dummy1 = DummyPackProcessor() nlp.add(component=dummy1, selector=FirstPackSelector()) dummy2 = DummyFixedSizeBatchProcessor() config = {"batcher": {"batch_size": batch_size}} nlp.add(component=dummy2, config=config, selector=FirstPackSelector()) dummy3 = DummyPackProcessor() nlp.add(component=dummy3, selector=FirstPackSelector()) nlp.initialize() data_path = os.path.join(data_samples_root, "random_texts", "0.txt") num_packs = 0 for pack in nlp.process_dataset(data_path): types = list(pack.get_pack("pack").get_entries_of(NewType)) num_packs += 1 self.assertEqual(len(types), 1) self.assertEqual(types[0].value, "[PACK][BATCH][PACK]") # check that all packs are yielded self.assertEqual(num_packs, reader.count)
def test_pipeline7(self, batch_size1, batch_size2, batch_size3): # Tests a chain of Batch->Batch->Batch->Pack with different batch sizes. nlp = Pipeline[MultiPack]() reader = MultiPackSentenceReader() nlp.set_reader(reader) dummy1 = DummmyFixedSizeBatchProcessor() config = {"batcher": {"batch_size": batch_size1}} nlp.add(component=dummy1, config=config, selector=FirstPackSelector()) dummy2 = DummmyFixedSizeBatchProcessor() config = {"batcher": {"batch_size": batch_size2}} nlp.add(component=dummy2, config=config, selector=FirstPackSelector()) dummy3 = DummmyFixedSizeBatchProcessor() config = {"batcher": {"batch_size": batch_size3}} nlp.add(component=dummy3, config=config, selector=FirstPackSelector()) dummy4 = DummyPackProcessor() nlp.add(component=dummy4, selector=FirstPackSelector()) nlp.initialize() data_path = data_samples_root + "/random_texts/0.txt" num_packs = 0 for pack in nlp.process_dataset(data_path): types = list(pack.get_pack("pack").get_entries_by_type(NewType)) num_packs += 1 self.assertEqual(len(types), 1) self.assertEqual(types[0].value, "[BATCH][BATCH][BATCH][PACK]") # check that all packs are yielded self.assertEqual(num_packs, reader.count)
def test_pipeline_multipack_selector(self): """Tests a batch processor only.""" nlp = Pipeline[MultiPack]() reader = MultiPackSentenceReader() nlp.set_reader(reader) dummy = DummyFixedSizeBatchProcessor() config = { "batcher": { "batch_size": 4, "context_type": "ft.onto.base_ontology.Sentence", }, } nlp.add(component=dummy, config=config, selector=FirstPackSelector()) nlp.initialize() data_path = data_samples_root + "/random_texts/0.txt" num_packs = 0 for pack in nlp.process_dataset(data_path): types = list(pack.get_pack("pack").get_entries_of(NewType)) num_packs += 1 self.assertEqual(len(types), 1) self.assertEqual(types[0].value, "[BATCH]") # check that all packs are yielded self.assertEqual(num_packs, reader.count)
def test_pipeline_multipack_three_stack_batch_diff_size_pack_chain( self, batch_size1, batch_size2, batch_size3 ): # Tests a chain of Batch->Batch->Batch->Pack with different batch sizes. nlp = Pipeline[MultiPack]() reader = MultiPackSentenceReader() nlp.set_reader(reader) dummy1 = DummyFixedSizeBatchProcessor() config = { "batcher": { "batch_size": batch_size1, "context_type": "ft.onto.base_ontology.Sentence", }, } nlp.add(component=dummy1, config=config, selector=FirstPackSelector()) dummy2 = DummyFixedSizeBatchProcessor() config = { "batcher": { "batch_size": batch_size2, "context_type": "ft.onto.base_ontology.Sentence", }, } nlp.add(component=dummy2, config=config, selector=FirstPackSelector()) dummy3 = DummyFixedSizeBatchProcessor() config = { "batcher": { "batch_size": batch_size3, "context_type": "ft.onto.base_ontology.Sentence", }, } nlp.add(component=dummy3, config=config, selector=FirstPackSelector()) dummy4 = DummyPackProcessor() nlp.add(component=dummy4, selector=FirstPackSelector()) nlp.initialize() data_path = os.path.join(data_samples_root, "random_texts", "0.txt") num_packs = 0 for pack in nlp.process_dataset(data_path): types = list(pack.get_pack("pack").get_entries_of(NewType)) num_packs += 1 self.assertEqual(len(types), 1) self.assertEqual(types[0].value, "[BATCH][BATCH][BATCH][PACK]") # check that all packs are yielded self.assertEqual(num_packs, reader.count)
def test_first_pack_selector(self) -> None: selector = FirstPackSelector() selector.initialize() packs = list(selector.select(self.multi_pack)) self.assertEqual(len(packs), 1) self.assertEqual(packs[0].pack_name, "1") # Test reverse selection. selector.initialize({"reverse_selection": True}) packs = list(selector.select(self.multi_pack)) self.assertEqual(len(packs), len(self.multi_pack.packs) - 1)
def test_pipeline1(self): """Tests a pack processor only.""" nlp = Pipeline[MultiPack]() reader = MultiPackSentenceReader() nlp.set_reader(reader) dummy = DummyPackProcessor() nlp.add(dummy, selector=FirstPackSelector()) nlp.initialize() data_path = data_samples_root + "/random_texts/0.txt" num_packs = 0 for pack in nlp.process_dataset(data_path): types = list(pack.get_pack("pack").get_entries_by_type(NewType)) num_packs += 1 self.assertEqual(len(types), 1) self.assertEqual(types[0].value, "[PACK]") # check that all packs are yielded self.assertEqual(num_packs, reader.count)
def test_pipeline2(self): """Tests a batch processor only.""" nlp = Pipeline() reader = MultiPackSentenceReader() nlp.set_reader(reader) dummy = DummmyFixedSizeBatchProcessor() config = {"batcher": {"batch_size": 4}} nlp.add_processor(processor=dummy, config=config, selector=FirstPackSelector()) nlp.initialize() data_path = "data_samples/random_texts/0.txt" num_packs = 0 for pack in nlp.process_dataset(data_path): types = list(pack.get_pack("pack").get_entries_by_type(NewType)) num_packs += 1 self.assertEqual(len(types), 1) self.assertEqual(types[0].value, "[BATCH]") # check that all packs are yielded self.assertEqual(num_packs, reader.count)
def test_first_pack_selector(self) -> None: selector = FirstPackSelector() packs = list(selector.select(self.multi_pack)) self.assertEqual(len(packs), 1) self.assertEqual(packs[0].meta.doc_id, "1")