예제 #1
0
    def test_multiple_intersections_1(self):
        text = ' C AAA C D '

        modifiers = [
            (r'(C) (AAA) (C) (D) ', {
                1: 'GG',
                2: 'BB',
                3: 'GG',
                4: 'DD'
            }),
            (r'( GG BB G)G', {
                1: 'HJK'
            }),
            (r'(HJK)G', {
                1: 'FF'
            }),
        ]

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, 'FFG DD ')
        self.assertEqual(procesor.span_map, [((0, 8), (0, 3)),
                                             ((9, 10), (4, 6))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, '00000000 1 ')
        self.assertEqual(decorated_processed_text, '000 11 ')
예제 #2
0
    def test_matching_2(self):
        text = ' AAA BBB CCC DDD '

        modifiers = [
            (r'(AAA) (BBB) (CCC)', {
                1: 'ZZZZ',
                2: 'YYYYY',
                3: 'XXXXXX'
            }),
            (r'((YYYYY)|(ZZZZ))', {
                1: 'WWWWWW'
            }),
            (r'(WWWWWW)', {
                1: 'QQQQQQQ'
            }),
        ]

        ref_span_map = [((1, 4), (1, 8)), ((5, 8), (9, 16)),
                        ((9, 12), (17, 23))]

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text,
                         ' QQQQQQQ QQQQQQQ XXXXXX DDD ')
        self.assertEqual(procesor.span_map, ref_span_map)

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_processed_text,
                         ' 0000000 1111111 222222 DDD ')
        self.assertEqual(decorated_text, ' 000 111 222 DDD ')
예제 #3
0
    def test_7(self):
        text = 'ab'
        pattern, replacement_map = r'((a)(b))', {1: 'c', 2: 'd', 3: 'e'}

        with self.assertRaises(ValueError):
            with Processor(text) as procesor:
                procesor.process(pattern, replacement_map)
예제 #4
0
    def test_intersection_5(self):
        text = ' C AAA C '

        modifiers = [
            (r' (AAA) C', {
                1: 'BBEBB'
            }),
            (r'C (BBEBB)', {
                1: 'DD'
            }),
            (r'(C D)D', {
                1: 'FF'
            }),
        ]

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, ' FFD C ')
        self.assertEqual(procesor.span_map, [((1, 6), (1, 4))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, ' 00000 C ')
        self.assertEqual(decorated_processed_text, ' 000 C ')
예제 #5
0
    def test_matching_1(self):
        text = ' BBB AAA AAA BBB '

        modifiers = [
            (r'(AAA)', {
                1: 'BBB'
            }),
            (r'(BBB)', {
                1: 'YYY'
            }),
        ]

        ref_span_map = [((1, 4), (1, 4)), ((5, 8), (5, 8)), ((9, 12), (9, 12)),
                        ((13, 16), (13, 16))]

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, ' YYY YYY YYY YYY ')
        self.assertEqual(procesor.span_map, ref_span_map)

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, ' 000 111 222 333 ')
        self.assertEqual(decorated_processed_text, ' 000 111 222 333 ')
예제 #6
0
    def test_matching_6(self):
        text = ' AAA D AAA D '

        modifiers = [
            (r'(AAA) (D)', {
                1: 'BBBBB',
                2: 'EE'
            }),
            (r'(BBBBB)', {
                1: 'CC'
            }),
            (r'(EE)', {
                1: 'FFFF'
            }),
        ]

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, ' CC FFFF CC FFFF ')
        self.assertEqual(procesor.span_map, [((1, 4), (1, 3)),
                                             ((5, 6), (4, 8)),
                                             ((7, 10), (9, 11)),
                                             ((11, 12), (12, 16))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, ' 000 1 222 3 ')
        self.assertEqual(decorated_processed_text, ' 00 1111 22 3333 ')
예제 #7
0
    def test_2(self):
        with Processor(self.text_1) as procesor:
            for pattern, replacement_map in self.modifiers_1:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, ' BBB BBB BBB BBB ')
        self.assertEqual(procesor.span_map, self.span_map_1_1)

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, ' 000 BBB 111 BBB ')
        self.assertEqual(decorated_processed_text, ' 000 BBB 111 BBB ')
예제 #8
0
    def test_5(self):
        with Processor(self.text_0) as procesor:
            for pattern, replacement_map in self.modifiers_3:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, ' CCC CCC CCC CCC ')
        self.assertEqual(procesor.span_map, self.span_map_2)

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, ' 000 111 CCC 222 ')
        self.assertEqual(decorated_processed_text, ' 000 111 CCC 222 ')
예제 #9
0
    def test_1(self):
        with Processor(self.text_1) as procesor:
            for pattern, replacement_map in self.modifiers_0:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, ' ZZZ YYY ZZZ YYY ')
        self.assertEqual(procesor.span_map, self.span_map)

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, ' 000 111 222 333 ')
        self.assertEqual(decorated_processed_text, ' 000 111 222 333 ')
예제 #10
0
    def test_single_1(self):
        text = 'ABAB'
        pattern, replacement_map = r'(A)(B)', {1: 'CC', 2: 'D'}

        with Processor(text) as procesor:
            procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, 'CCDCCD')
        self.assertEqual(procesor.span_map,
                         [((0, 1), (0, 2)), ((1, 2), (2, 3)), ((2, 3), (3, 5)),
                          ((3, 4), (5, 6))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, '0123')
        self.assertEqual(decorated_processed_text, '001223')
예제 #11
0
    def test_matching_3(self):
        text = 'AZA'

        modifiers = [(r'(A)', {1: 'BB'}), (r'(BB)', {1: 'DD'})]

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, 'DDZDD')
        self.assertEqual(procesor.span_map, [((0, 1), (0, 2)),
                                             ((2, 3), (3, 5))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, '0Z1')
        self.assertEqual(decorated_processed_text, '00Z11')
예제 #12
0
    def test_bundled_1(self):
        text = 'ABAB'
        modifiers = [(r'(A)', {1: 'CC'}), (r'(B)', {1: 'D'})]

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, 'CCDCCD')
        self.assertEqual(procesor.span_map,
                         [((0, 1), (0, 2)), ((1, 2), (2, 3)), ((2, 3), (3, 5)),
                          ((3, 4), (5, 6))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, '0123')
        self.assertEqual(decorated_processed_text, '001223')
예제 #13
0
    def test_bundled_4(self):
        text = 'BAAB'

        modifiers = [(r'(AA)', {1: 'CC'}), (r'(B)', {1: ''})]

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, 'CC')
        self.assertEqual(procesor.span_map,
                         [((0, 1), (0, 0)), ((1, 3), (0, 2)),
                          ((3, 4), (2, 2))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, '0112')
        self.assertEqual(decorated_processed_text, '11')
예제 #14
0
    def test_9(self):
        text = 'AA BBBB& CC&CCCC'

        pattern_1, replacement_map_1 = r'([A-Za-z&]+)', {
            1: lambda x: x.replace('&', '')
        }
        pattern_2, replacement_map_2 = r'(AA) ', {1: 'DDD DDD'}

        with Processor(text) as procesor:
            procesor.process(pattern_1, replacement_map_1)
            procesor.swap()
            procesor.process(pattern_2, replacement_map_2)

        self.assertEqual(procesor.text, 'AA BBBB CCCCCC')
        self.assertEqual(procesor.processed_text, 'DDD DDD BBBB& CC&CCCC')
        self.assertEqual(procesor.span_map, [((0, 2), (0, 7)),
                                             ((3, 7), (8, 13)),
                                             ((8, 14), (14, 21))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, '00 1111 222222')
        self.assertEqual(decorated_processed_text, '0000000 11111 2222222')
예제 #15
0
    def test_intersection_6(self):
        modifiers = [
            (r' (AAA)B', {
                1: 'CCC CCC'
            }),
            (r'(CCCB)', {
                1: lambda x: x
            }),
        ]

        text = ' AAAB'

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, ' CCC CCCB')
        self.assertEqual(procesor.span_map, [((1, 5), (1, 9))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, ' 0000')
        self.assertEqual(decorated_processed_text, ' 00000000')
예제 #16
0
    def test_matching_4(self):
        text = ' AAA '

        modifiers = [
            (r'(AAA)', {
                1: 'BBBBB'
            }),
            (r'(BBBBB)', {
                1: 'CC'
            }),
        ]

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, ' CC ')
        self.assertEqual(procesor.span_map, [((1, 4), (1, 3))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, ' 000 ')
        self.assertEqual(decorated_processed_text, ' 00 ')
예제 #17
0
    def test_intersection_7(self):
        modifiers = [
            (r'B(AAA) ', {
                1: 'CCC CCC'
            }),
            (r'(BCCC)', {
                1: lambda x: x
            }),
        ]

        text = 'BAAA '

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, 'BCCC CCC ')
        self.assertEqual(procesor.span_map, [((0, 4), (0, 8))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, '0000 ')
        self.assertEqual(decorated_processed_text, '00000000 ')
예제 #18
0
    def test_new(self):
        modifiers = [
            (r' (etc)\.', {
                1: 'et cetera'
            }),
            (r'([^ ]+)', {
                1: lambda x: x
            }),
        ]

        text = ' etc.'

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, ' et cetera.')
        self.assertEqual(procesor.span_map, [((1, 5), (1, 11))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, ' 0000')
        self.assertEqual(decorated_processed_text, ' 0000000000')
예제 #19
0
    def test_intersection_1(self):
        text = 'C AAA C'

        modifiers = [
            (r'(AAA)', {
                1: 'BBBBB'
            }),
            (r'(C BBBBB C)', {
                1: 'DD'
            }),
        ]

        with Processor(text) as procesor:
            for pattern, replacement_map in modifiers:
                procesor.process(pattern, replacement_map)

        self.assertEqual(procesor.processed_text, 'DD')
        self.assertEqual(procesor.span_map, [((0, 7), (0, 2))])

        decorated_text, decorated_processed_text = procesor.decorate()

        self.assertEqual(decorated_text, '0000000')
        self.assertEqual(decorated_processed_text, '00')