Ejemplo n.º 1
0
    def test_allennlp_processor_with_repeating_words(self, sentence):
        processors = "tokenize"
        nlp = self._create_pipeline({'processors': processors})
        self.document = sentence
        self.tokens = [sentence.replace('.', ' .').split()]
        pack = nlp.process(self.document)

        tag_format = AllenNLPProcessor.default_configs()['tag_formalism']

        self._check_results(pack, processors, tag_format)
    def test_allennlp_processor_with_different_tag_formats(self, format):
        if format == "random_dependencies":
            with self.assertRaises(ProcessorConfigError):
                self._create_pipeline({'tag_formalism': format})
        else:
            nlp = self._create_pipeline({'tag_formalism': format})
            pack = nlp.process(self.document)

            processors = AllenNLPProcessor.default_configs()['processors']

            self._check_results(pack, processors, format)
    def _create_pipeline(config):
        nlp = Pipeline[DataPack]()
        nlp.set_reader(StringReader())

        # Using SpacyProcessor to segment the sentences
        nlp.add(
            component=SpacyProcessor(),
            config={
                'processors': '',
                'lang':
                "en_core_web_sm",  # Language code to build the Pipeline
                'use_gpu': False
            })

        nlp.add(component=AllenNLPProcessor(), config=config)
        nlp.initialize()
        return nlp