def test_allennlp_processor_with_repeating_words(self, sentence): processors = "tokenize" nlp = self._create_pipeline({'processors': processors}) self.document = sentence self.tokens = [sentence.replace('.', ' .').split()] pack = nlp.process(self.document) tag_format = AllenNLPProcessor.default_configs()['tag_formalism'] self._check_results(pack, processors, tag_format)
def test_allennlp_processor_with_different_tag_formats(self, format): if format == "random_dependencies": with self.assertRaises(ProcessorConfigError): self._create_pipeline({'tag_formalism': format}) else: nlp = self._create_pipeline({'tag_formalism': format}) pack = nlp.process(self.document) processors = AllenNLPProcessor.default_configs()['processors'] self._check_results(pack, processors, format)
def _create_pipeline(config): nlp = Pipeline[DataPack]() nlp.set_reader(StringReader()) # Using SpacyProcessor to segment the sentences nlp.add( component=SpacyProcessor(), config={ 'processors': '', 'lang': "en_core_web_sm", # Language code to build the Pipeline 'use_gpu': False }) nlp.add(component=AllenNLPProcessor(), config=config) nlp.initialize() return nlp