Ejemplo n.º 1
0
 def test_annotation_merge_mixed(self):
     annotations = [Annotation(start=0, end=5, name="bold", value="True"),
                    Annotation(start=5, end=15, name="bold", value="True"),
                    Annotation(start=4, end=6, name="italic", value="True"),
                    Annotation(start=6, end=66, name="italic", value="True"),
                    ]
     self.assertSetEqual({(0, 15, "bold", "True"), (4, 66, "italic", "True")}, self.merge(annotations))
Ejemplo n.º 2
0
 def test_annotation_merge_same_value_no_spaces(self):
     annotations = [
         Annotation(start=0, end=5, name="size", value="1"),
         Annotation(start=5, end=15, name="size", value="1")
     ]
     text = "hellomyfriend"
     self.assertSetEqual({(0, 15, "size", "1")},
                         self.merge(annotations, text))
Ejemplo n.º 3
0
 def test_annotation_merge_same_value_separating_by_newline(self):
     annotations = [
         Annotation(start=0, end=5, name="size", value="1"),
         Annotation(start=6, end=15, name="size", value="1")
     ]
     text = "hello\nmy friend"
     self.assertSetEqual({(0, 15, "size", "1")},
                         self.merge(annotations, text))
Ejemplo n.º 4
0
 def test_annotation_merge_same_value_separating_by_many_space(self):
     annotations = [
         Annotation(start=0, end=5, name="size", value="1"),
         Annotation(start=20, end=25, name="size", value="1")
     ]
     text = "hello               my friend"
     self.assertSetEqual({(0, 25, "size", "1")},
                         self.merge(annotations, text))
Ejemplo n.º 5
0
 def test_annotation_merge_different_value(self):
     annotations = [
         Annotation(start=0, end=5, name="bold", value="True"),
         Annotation(start=5, end=15, name="italic", value="True")
     ]
     text = "hello my friend"
     self.assertSetEqual({(0, 5, "bold", "True"),
                          (5, 15, "italic", "True")},
                         self.merge(annotations, text))
Ejemplo n.º 6
0
 def test_annotation_merge_three_nested_annotations(self):
     annotations = [
         Annotation(start=0, end=15, name="size", value="1"),
         Annotation(start=6, end=10, name="size", value="1"),
         Annotation(start=3, end=8, name="size", value="1")
     ]
     text = "hello my friend"
     self.assertSetEqual({(0, 15, "size", "1")},
                         self.merge(annotations, text))
Ejemplo n.º 7
0
 def test_annotation_merge_three_one_intersected_annotations(self):
     annotations = [
         Annotation(start=0, end=3, name="size", value="1"),
         Annotation(start=3, end=6, name="size", value="1"),
         Annotation(start=8, end=15, name="size", value="1")
     ]
     text = "hello my friend"
     self.assertSetEqual({(0, 6, "size", "1"), (8, 15, "size", "1")},
                         self.merge(annotations, text))
Ejemplo n.º 8
0
    def test_merge_1000_pair_annotations(self):
        timeout = 10
        n = 1000
        annotations = []
        for i in range(n):
            annotations.append(
                Annotation(start=i, end=i + 1, name="bold", value="True"))
            annotations.append(
                Annotation(start=i, end=i + 1, name="size", value="1"))

        text = "x" * n
        with TestTimeout(timeout):
            result = self.merge(annotations, text)
        self.assertSetEqual({(0, n, "bold", "True"), (0, n, "size", "1")},
                            result)
Ejemplo n.º 9
0
    def _merge_annotations(annotations: List[Annotation]) -> List[Annotation]:
        """
        Merge annotations when end of the firs annotation and start of the second match and has same value.
        Used with add_text
        """
        annotations_group_by_name_value = TreeNode._group_annotations(
            annotations)

        merged_set = set()
        merged = []
        for annotation_group in annotations_group_by_name_value.values():
            for firs_annotation in annotation_group:
                for second_annotation in annotation_group:
                    if firs_annotation.end == second_annotation.start:
                        merged_annotation = Annotation(
                            start=firs_annotation.start,
                            end=second_annotation.end,
                            name=firs_annotation.name,
                            value=firs_annotation.value)
                        merged.append(merged_annotation)
                        merged_set.add(
                            (firs_annotation.end, firs_annotation.start,
                             firs_annotation.name, firs_annotation.value))
                        merged_set.add(
                            (second_annotation.end, second_annotation.start,
                             second_annotation.name, second_annotation.value))
        other_annotations = [
            annotation for annotation in annotations
            if (annotation.end, annotation.start, annotation.name,
                annotation.value) not in merged_set
        ]
        return sorted(other_annotations + merged, key=lambda a: a.start)
Ejemplo n.º 10
0
 def __shift_annotations(line: LineWithMeta,
                         text_length: int) -> List[Annotation]:
     new_annotations = []
     for annotation in line.annotations:
         new_annotation = Annotation(start=annotation.start + text_length,
                                     end=annotation.end + text_length,
                                     name=annotation.name,
                                     value=annotation.value)
         new_annotations.append(new_annotation)
     return new_annotations
Ejemplo n.º 11
0
 def test_merge_1000_annotations(self):
     timeout = 10
     n = 1000
     annotations = [
         Annotation(start=i, end=i + 1, name="bold", value="True")
         for i in range(n)
     ]
     text = "x" * n
     with TestTimeout(timeout):
         result = self.merge(annotations, text)
     self.assertSetEqual({(0, n, "bold", "True")}, result)
Ejemplo n.º 12
0
 def merge(self) -> Optional[Annotation]:
     if len(self.annotations) == 0:
         return None
     else:
         start = min((a.start for a in self.annotations))
         end = max((a.end for a in self.annotations))
         annotation = self.annotations[0]
         return Annotation(start=start,
                           end=end,
                           value=annotation.value,
                           name=annotation.name)
Ejemplo n.º 13
0
    def test_merge_1000_no_intersection(self):
        timeout = 10
        n = 1000
        annotations = []
        for i in range(0, n, 2):
            annotations.append(
                Annotation(start=i, end=i + 1, name="bold", value="True"))

        text = "x" * (2 * n)
        with TestTimeout(timeout):
            result = self.merge(annotations, text)
        self.assertSetEqual(
            {(a.start, a.end, a.name, a.value)
             for a in annotations}, result)
Ejemplo n.º 14
0
 def add_text(self, line: LineWithMeta):
     """
     add the text and annotations from given line, text is separated with \n
     :param line: line with text to add
     :return:
     """
     new_annotations = []
     text_length = len(self.text)
     for annotation in line.annotations:
         new_annotation = Annotation(start=annotation.start + text_length -
                                     1,
                                     end=annotation.end + text_length,
                                     name=annotation.name,
                                     value=annotation.value)
         new_annotations.append(new_annotation)
     self.text += line.line
     self.annotations.extend(new_annotations)
     self.annotations = self._merge_annotations(self.annotations)
Ejemplo n.º 15
0
 def get_api_dict(api: Api,
                  depth: int = 0,
                  name: str = 'TreeNode') -> Model:
     return api.model(
         name, {
             'node_id':
             fields.String(
                 description=
                 "Document element identifier. It is unique within one tree (i.e. "
                 "there will be no other such node_id in this tree, but in attachment "
                 "it may occur) The identifier has the form 0.2.1 where each number "
                 "means a serial number at the corresponding level of the hierarchy.",
                 required=True,
                 example="0.2.1"),
             'text':
             fields.String(description="text of node",
                           required=True,
                           example="Закон"),
             'annotations':
             fields.List(
                 fields.Nested(Annotation.get_api_dict(api),
                               description="Text annotations "
                               "(font, size, bold, italic and etc)")),
             'metadata':
             fields.Nested(ParagraphMetadata.get_api_dict(api),
                           skip_none=True,
                           allow_null=False,
                           description="Paragraph meta information"),
             'subparagraphs':
             fields.List(
                 fields.Nested(api.model('others_TreeNode', {})),
                 description=
                 "Node childes (with type 'TreeNode') of structure tree")
             if depth == get_config()['recursion_deep_subparagraphs'] else
             fields.List(
                 fields.Nested(
                     TreeNode.get_api_dict(
                         api,
                         depth=depth +
                         1,
                         name='refTreeNode' + str(depth))),
                 description=
                 "Node childes (with type 'TreeNode') of structure tree")
         })
Ejemplo n.º 16
0
 def test_annotation_unmerge_same_value(self):
     annotations = [Annotation(start=0, end=4, name="size", value="1"),
                    Annotation(start=5, end=15, name="size", value="1")]
     self.assertSetEqual({(0, 4, "size", "1"), (5, 15, "size", "1")}, self.merge(annotations))
Ejemplo n.º 17
0
 def test_annotation_merge_same_value2(self):
     annotations = [Annotation(start=4, end=5, name="size", value="1"),
                    Annotation(start=5, end=15, name="size", value="1")]
     self.assertSetEqual({(4, 15, "size", "1")}, self.merge(annotations))
Ejemplo n.º 18
0
 def test_annotation_merge_one_near_space(self):
     annotations = [Annotation(start=0, end=5, name="size", value="1")]
     text = "hello my friend"
     self.assertSetEqual({(0, 5, "size", "1")},
                         self.merge(annotations, text))