def test_merging_regions(self):
        paragraphs_merging_filter = ParagraphsMergingFilter()

        isd = ISD(None)

        r1 = ISD.Region("r1", isd)
        b1 = self._get_filled_body(isd, ["Hello", "world"],
                                   ["Is there", "anyone here?"])
        r1.push_child(b1)

        isd.put_region(r1)

        regions = list(isd.iter_regions())
        self.assertEqual(1, len(regions))

        body = list(regions[0])
        self.assertEqual(1, len(body))

        divs = list(body[0])
        self.assertEqual(2, len(divs))

        paragraphs_1 = list(divs[0])
        self.assertEqual(2, len(paragraphs_1))

        paragraphs_2 = list(divs[1])
        self.assertEqual(2, len(paragraphs_2))

        paragraphs_merging_filter.process(isd)

        regions = list(isd.iter_regions())
        self.assertEqual(1, len(regions))

        body = list(regions[0])
        self.assertEqual(1, len(body))

        divs = list(body[0])
        self.assertEqual(1, len(divs))

        paragraphs = list(divs[0])
        self.assertEqual(1, len(paragraphs))

        spans_and_brs = list(paragraphs[0])

        text = self._get_text_from_children(spans_and_brs[0])
        self.assertEqual("Hello", text)

        self.assertIsInstance(spans_and_brs[1], Br)

        text = self._get_text_from_children(spans_and_brs[2])
        self.assertEqual("world", text)

        self.assertIsInstance(spans_and_brs[3], Br)

        text = self._get_text_from_children(spans_and_brs[4])
        self.assertEqual("Is there", text)

        self.assertIsInstance(spans_and_brs[5], Br)

        text = self._get_text_from_children(spans_and_brs[6])
        self.assertEqual("anyone here?", text)
Esempio n. 2
0
    def test_merging_regions(self):
        regions_merging_filter = RegionsMergingFilter()

        isd = ISD(None)

        r1 = ISD.Region("r1", isd)
        b1 = self._get_filled_body(isd, "Hello world")
        r1.push_child(b1)

        r2 = ISD.Region("r2", isd)
        b2 = self._get_filled_body(isd, "Is there anyone here?")
        r2.push_child(b2)

        isd.put_region(r1)
        isd.put_region(r2)

        self.assertEqual(2, len(list(isd.iter_regions())))

        regions_merging_filter.process(isd)

        self.assertEqual(1, len(list(isd.iter_regions())))

        merged_region = isd.get_region("r1_r2")
        self.assertIsNotNone(merged_region)

        body = list(merged_region)
        self.assertEqual(1, len(body))

        divs = list(body[0])
        self.assertEqual(2, len(divs))

        text = self._get_text_from_children(divs[0])
        self.assertEqual("Hello world", text)

        text = self._get_text_from_children(divs[1])
        self.assertEqual("Is there anyone here?", text)
Esempio n. 3
0
    def process(self, isd: ISD):
        """Merges the ISD document regions"""
        LOGGER.debug("Apply regions merging filter to ISD.")

        original_regions = list(isd.iter_regions())

        not_empty_regions = 0
        for region in original_regions:
            not_empty_regions += len(region)

        if len(original_regions) <= 1 or not_empty_regions <= 1:
            return

        LOGGER.warning("Merging ISD regions.")

        target_body = Body(isd)
        region_ids = []

        for region in original_regions:
            region_id = region.get_id()
            for body in region:

                for child in body:
                    # Remove child from its parent body
                    child.remove()

                    # Add it to the target body
                    target_body.push_child(child)

            region_ids.append(region_id)
            isd.remove_region(region_id)

        target_region = ISD.Region("_".join(region_ids), isd)
        target_region.push_child(target_body)

        isd.put_region(target_region)