Python align_tags Examples

Programming Language: Python

Namespace/Package Name: unbabel_text_utils.tags_align.align_wa.wa_tag_align

Method/Function: align_tags

Examples at hotexamples.com: 2

Python align_tags - 2 examples found. These are the top rated real world Python examples of unbabel_text_utils.tags_align.align_wa.wa_tag_align.align_tags extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_wa_tag_align.py Project: Unbabel/unbabel-text-utils

    def atest_canonical(self):
        """
        Test WA align for canonical examples.
        Each entry is

        source_raw - The string
        src_tokens - List of words
        tok_src - char position of src_tokens
        tok_trg - char position of tokenized target
        src_tags - Dict of markup tags
        trg_tokens - List of target words
        wrd_align= Alignment between source and target tokens
        """

        examples = [
            # One tag around everything <i>a b</i>
            # ["<i>a b</i>--><i>1 2</i>",
            ["<i>a b</i>",
             ["a", "b"],
             [[0, 0], [2, 2]],
             [[0, 0], [2, 2]],
             {0: [{'tag_type': 'open',
                   'close_tid': 1,
                   'text': u'<i>',
                   'tid': 0}],
              3: [{'tag_type': 'close',
                   'open_tid': 0,
                   'text': u'</i>',
                   'tid': 1}]},
             ["1", "2"],
             [(0, 0), (1, 1)],
             {0: [{'tag_type': 'open',
                   'close_tid': 1,
                   'text': u'<i>',
                   'tid': 0}],
              3: [{'tag_type': 'close',
                   'open_tid': 0,
                   'text': u'</i>',
                   'tid': 1}]}],
            # One tag around the first word same order
            # <i>a</i> b --> <i>1</i> 2
            # ["<i>a</i> b--><i>1</i> 2",
            ["<i>a</i> b",
             ["a", "b"],
             [[0, 0], [2, 2]],
             [[0, 0], [2, 2]],
             {0: [{'tag_type': 'open',
                   'close_tid': 1,
                   'text': u'<i>',
                   'tid': 0}],
              1: [{'tag_type': 'close',
                   'open_tid': 0,
                   'text': u'</i>',
                   'tid': 1}]},
             ["1", "2"],
             [(0, 0), (1, 1)],
             {0: [{'tag_type': 'open',
                   'close_tid': 1,
                   'text': u'<i>',
                   'tid': 0}],
              1: [{'tag_type': 'close',
                   'open_tid': 0,
                   'text': u'</i>',
                   'tid': 1}]}],
            # One tag around the first word change order
            # <i>a</i> b --> 1 <i>2</i>
            # ["<i>a</i> b-->1 <i>2</i>",
            ["<i>a</i> b",
             ["a", "b"],
             [[0, 0], [2, 2]],
             [[0, 0], [2, 2]],
             {0: [{'tag_type': 'open',
                   'close_tid': 1,
                   'text': u'<i>',
                   'tid': 0}],
              1: [{'tag_type': 'close',
                   'open_tid': 0,
                   'text': u'</i>',
                   'tid': 1}]},
             ["1", "2"],
             [(0, 1), (1, 0)],
             {2: [{'tag_type': 'open',
                   'close_tid': 1,
                   'text': u'<i>',
                   'tid': 0}],
              3: [{'tag_type': 'close',
                   'open_tid': 0,
                   'text': u'</i>',
                   'tid': 1}]}],
            # ["<i>a</i> c d b-->1 3 <i>2</i> 4",
            ["<i>a</i> c d b",
             ["a", "c", "d", "b"],
             [[0, 0], [2, 2], [4, 4], [6, 6]],
             [[0, 0], [2, 2], [4, 4], [6, 6]],
             {0: [{'tag_type': 'open',
                   'close_tid': 1,
                   'text': u'<i>',
                   'tid': 0}],
              1: [{'tag_type': 'close',
                   'open_tid': 0,
                   'text': u'</i>',
                   'tid': 1}]},
             ["1", "3", "4", "2"],
             [(0, 2), (1, 3), (2, 0), (3, 1)],
             {4: [{'tag_type': 'open',
                   'close_tid': 1,
                   'text': u'<i>',
                   'tid': 0}],
              5: [{'tag_type': 'close',
                   'open_tid': 0,
                   'text': u'</i>',
                   'tid': 1}]}]
        ]

        for i, (text, s_token, tok_src, tok_trg, s_tags, t_token, wrd_align,
                truth) in enumerate(examples):
            with self.subTest(i=i):
                t_tags = align_tags(text,
                                    s_token,
                                    tok_src,
                                    tok_trg,
                                    s_tags,
                                    t_token,
                                    wrd_align=wrd_align)
                self.assertEqual(t_tags, truth,
                                 "\n%s\nTruth:\n%s\nGot:\n%s" % (text, truth,
                                                                 t_tags))

Example #2

Show file

File: test_wa_tag_align.py Project: Unbabel/unbabel-text-utils

    def test_neste_examples(self):
        """ Assume we can always tokenize on space
            Assume that we have the same characters on target.
        """

        examples = [
            ["<a><i>a b</i></a>",
             [[0, 0], [1, 1]],
             "<a><i>a b</i></a>"],
            ["<a><i>a b</i></a>",
             [[0, 1], [1, 0]],
             "<a><i>b a</i></a>"],
            ["<a>c<i>a b</i></a>",
             [[0, 0], [1, 2], [2, 1]],
             "<a>c<i>b a</i></a>"],
            ["<a>c<i>a b</i> d</a>",
             [[0, 0], [1, 2], [2, 1], [3, 3]],
             "<a>c<i>b a</i> d</a>"],
            ["<a>c<i>a b</i> e d</a>",
             [[0, 0], [1, 2], [2, 1], [3, 4], [4, 3]],
            "<a>c<i>b a</i> d e</a>"],
            ["<a>a<i> b<b> c</b> d</i> e</a>",
             [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]],
             "<a>a<i> b<b> c</b> d</i> e</a>"],
            ["<a>a<i> b</i></a>",
             [[0, 0], [1, 1]],
             "<a>a<i> b</i></a>"],


        ]

        # nested tags
            # "<a><i>a</i></a> --> <a><i>1</i></a>"

            # "<a>a<i>b</i></a> --> <a>a<i>b</i></a>"

            # "<a>a<i>b</i>c</a> --> <a>a<i>b</i>c</a>"

        for i, (tagged_text, wrd_align, truth) in enumerate(examples):
            with self.subTest(i=i):
                text, s_tags = remove_tags(tagged_text)
                s_token = text.split(" ")
                tok_src = split_with_indices(text, " ")
                tok_trg = tok_src
                t_token = get_target_tokens(s_token, wrd_align)

                t_tags = align_tags(tagged_text,
                                    text,
                                    s_token,
                                    tok_src,
                                    tok_trg,
                                    s_tags,
                                    t_token,
                                    wrd_align=wrd_align)

                target_text = " ".join(t_token)

                target_tagged_text = insert_tags(target_text, t_tags)

                self.assertEqual(target_tagged_text, truth,
                                 "\n%s\nTruth:\n%s\nGot:\n%s" % (
                                     tagged_text, truth,
                                     target_tagged_text))