def test_allennlp_processor_with_existing_entries(self, overwrite_entries,
                                                      allow_parallel_entries):
        config = {
            'overwrite_entries': overwrite_entries,
            'allow_parallel_entries': allow_parallel_entries
        }
        nlp = self._create_pipeline(config)

        # Adding extra processor to have existing tokens and dependencies
        nlp.add(component=AllenNLPProcessor(), config=config)
        nlp.initialize()

        if not overwrite_entries and not allow_parallel_entries:
            # Processor should raise config error when both the flags are False
            # and existing entries are found
            with self.assertRaises(ProcessExecutionException):
                nlp.process(self.document)
        else:
            pack = nlp.process(self.document)

            processors = AllenNLPProcessor.default_configs()['processors']
            tag_format = AllenNLPProcessor.default_configs()['tag_formalism']

            if not overwrite_entries:
                if allow_parallel_entries:
                    # Should raise AssertionError due to duplicate tokens
                    with self.assertRaises(AssertionError):
                        self._check_results(pack, processors, tag_format)
            else:
                self._check_results(pack, processors, tag_format)
    def test_allennlp_processor_with_different_processors(self, processors):
        nlp = self._create_pipeline({'processors': processors})
        pack = nlp.process(self.document)

        if processors == "":
            processors = AllenNLPProcessor.default_configs()['processors']
        tag_format = AllenNLPProcessor.default_configs()['tag_formalism']

        self._check_results(pack, processors, tag_format)
Ejemplo n.º 3
0
    def test_allennlp_processor_with_repeating_words(self, sentence):
        processors = "tokenize"
        nlp = self._create_pipeline({'processors': processors})
        self.document = sentence
        self.tokens = [sentence.replace('.', ' .').split()]
        pack = nlp.process(self.document)

        output_format = AllenNLPProcessor.default_configs()['output_format']

        self._check_results(pack, processors, output_format)
    def test_allennlp_processor_with_different_tag_formats(self, format):
        if format == "random_dependencies":
            with self.assertRaises(ProcessorConfigError):
                self._create_pipeline({'tag_formalism': format})
        else:
            nlp = self._create_pipeline({'tag_formalism': format})
            pack = nlp.process(self.document)

            processors = AllenNLPProcessor.default_configs()['processors']

            self._check_results(pack, processors, format)