Esempi in Python per ParsedTextQualityEstimator

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: apps.task.utils.nlp.parsed_text_quality_estimator

Classe/tipologia: ParsedTextQualityEstimator

Esempi su hotexamples.com: 7

ParsedTextQualityEstimator in Python: 7 esempi trovati. Questi sono i migliori esempi reali in Python per apps.task.utils.nlp.parsed_text_quality_estimator.ParsedTextQualityEstimator, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

estimate_text(5)

ParsedTextQualityEstimator(4)

check_line_followed_by_unnecessary_break(2)

split_text_on_lines(2)

Esempio n. 1

Mostra file

File: test_parsed_text_quality_estimator.py Progetto: tx-anin/lexpredict-contraxsuite

 def test_estimate_text_abusing_headers(self):
     text = load_resource_document('parsing/text_abusing_headers.txt',
                                   'utf-8')
     text = pre_process_document(text)
     estimator = ParsedTextQualityEstimator()
     estim = estimator.estimate_text(text)
     self.assertLess(estim.extra_line_breaks_prob, 50)

Esempio n. 2

Mostra file

File: parsed_text_corrector.py Progetto: zajacm/lexpredict-contraxsuite

    def correct_line_breaks(
        self,
        text: str,
        estimator: ParsedTextQualityEstimator = None,
        transformations: Optional[List[Tuple[Tuple[int, int],
                                             Tuple[int, int]]]] = None
    ) -> str:
        if estimator is None:
            estimator = ParsedTextQualityEstimator()
            estimator.split_text_on_lines(text)

        resulted = ''
        lines = estimator.lines
        total_len = 0

        for indx in range(0, len(lines)):
            line = lines[indx]  # TypedLineOrPhrase
            ending_len = len(line.ending)

            if estimator.check_line_followed_by_unnecessary_break(indx):
                self.normalize_line_ending(line)

            if transformations is not None and ending_len != len(line.ending):
                line_start = total_len + len(line.text)
                old_end = line_start + ending_len
                new_end = line_start + len(line.ending)
                transformations.append(
                    ((line_start, old_end), (line_start, new_end)))

            resulted += line.text
            resulted += line.ending
        return resulted

Esempio n. 3

Mostra file

 def correct_if_corrupted(self, text: str) -> str:
     estimator = ParsedTextQualityEstimator()
     estim = estimator.estimate_text(text)
     if estim.corrupted_prob < 50:
         return text
     if estim.extra_line_breaks_prob > 50:
         text = self.correct_line_breaks(text, estimator)
     return text

Esempio n. 4

Mostra file

    def test_estimate_dense_text(self):
        text = load_resource_document('parsing/pdf_malformat_parsed_default.txt', 'utf-8')
        estimator = ParsedTextQualityEstimator()
        estim = estimator.estimate_text(text)
        self.assertGreater(estim.extra_line_breaks_prob, 50)

        text = load_resource_document('parsing/pdf_malformat_parsed_stripper.txt', 'utf-8')
        estim = estimator.estimate_text(text)
        self.assertLess(estim.extra_line_breaks_prob, 30)

Esempio n. 5

Mostra file

 def correct_if_corrupted(self,
                          text: str,
                          transformations: Optional[List[Tuple[Tuple[int, int], Tuple[int, int]]]] = None
                          ) -> str:
     estimator = ParsedTextQualityEstimator()
     estim = estimator.estimate_text(text)
     if estim.corrupted_prob < 50:
         return text
     if estim.extra_line_breaks_prob > 50:
         text = self.correct_line_breaks(text, estimator, transformations=transformations)
     return text

Esempio n. 6

Mostra file

File: test_parsed_text_quality_estimator.py Progetto: tx-anin/lexpredict-contraxsuite

    def test_estimate_fishy_header(self):
        text = """
Notwithstanding anything in this Section (B) of Article IV to the contrary, in the event any such disruption to Shmenant's operations and use of the demised premises is attributable to Landlord's negligence, or that of its agents, contractors, servants or employees, or is attributable to a breach by Landlord of its obligations under this lease, and if such disruption shall materially impair Shmenant's use of the demised premises for a period in excess of five (5) business days in duration, then a just proportion of the Rent, according to the nature and extent of the impairment to Shmenant's operation and use of the demised premises shall abate for any such period of time from the date of disruption which is in excess of said five (5) business days in duration.



ARTICLE V


RENT"""
        estimator = ParsedTextQualityEstimator()
        estim = estimator.estimate_text(text)
        self.assertLess(estim.extra_line_breaks_prob, 50)

Esempio n. 7

Mostra file

    def correct_line_breaks(
            self,
            text: str,
            estimator: ParsedTextQualityEstimator = None) -> str:
        if estimator is None:
            estimator = ParsedTextQualityEstimator()
            estimator.split_text_on_lines(text)

        resulted = ''
        lines = estimator.lines

        for indx in range(0, len(lines)):
            line = lines[indx]
            if estimator.check_line_followed_by_unnecessary_break(indx):
                self.normalize_line_ending(line)
            resulted += line.text
            resulted += line.ending
        return resulted