def redact_words(self, refs: List[str]):
        def word_path(ref: str) -> str:
            # Remove leading '#'.
            return ref[1:]

        for ref in refs:
            r = word_path(ref)
            word = resolve_pointer(self.ocr_result, r)
            word["text"] = first_char(word["text"])
            set_pointer(self.ocr_result, r, word)
    def redact_lines(self, refs: List[str]):
        def line_path(ref: str) -> str:
            end = ref.find("/word")
            # Remove leading '#' and trailing word path.
            return ref[1:end]

        for ref in refs:
            r = line_path(ref)
            line = resolve_pointer(self.ocr_result, r)

            tokens = line["text"].split(' ')
            word_id = int(ref.split('/')[-1])
            tokens[word_id] = first_char(tokens[word_id])
            line["text"] = ' '.join(tokens)

            set_pointer(self.ocr_result, r, line)
    def redact_page_results(self, refs: Set[str]):
        def add_analyze_layer(elem: str) -> str:
            return elem.replace('#/', '#/analyzeResult/')

        page_results = self.ocr_result["analyzeResult"]["pageResults"]
        for page_result in page_results:
            tables: List[dict] = page_result["tables"]
            for table in tables:
                cells: List[dict] = table["cells"]
                for cell in cells:
                    elements: List[str] = cell["elements"]
                    for elem_id, element in enumerate(elements):
                        full_elem = add_analyze_layer(element)
                        if full_elem in refs:
                            tokens = cell["text"].split(' ')
                            tokens[elem_id] = first_char(tokens[elem_id])
                            cell["text"] = ' '.join(tokens)
Exemple #4
0
 def redact(self):
     for label in self.fott_label.labels:
         if len(self.labels_to_redact) == 0 or label.label in self.labels_to_redact:
             for entity in label.value:
                 entity.text = first_char(entity.text)
 def test_first_char_empty(self) -> None:
     text = ""
     actual = first_char(text)
     assert "" == actual
 def test_first_char_diacritics(self) -> None:
     text = "Anaïs, Noël, Sørina, François, Mátyás, Agnès, Fañch, Reiß"
     actual = first_char(text)
     assert "Aaaaa, Aaaa, Aaaaaa, Aaaaaaaa, Aaaaaa, Aaaaa, Aaaaa, Aaaa" == actual
 def test_first_char_price(self) -> None:
     text = "$3000.00"
     actual = first_char(text)
     assert "$0000.00" == actual
 def test_first_char_date(self) -> None:
     text = "1900/01/01"
     actual = first_char(text)
     assert "0000/00/00" == actual
 def test_first_char_Apple(self) -> None:
     text = "Apple"
     actual = first_char(text)
     assert "Aaaaa" == actual