Esempio n. 1
0
    def test_keyterm_in_begin(self):
        self.assertEqual(list(tokenize('pre-foo-suf', ('pre', 'pre-foo-suf'))),
                         [Token('pre-foo-suf')])

        self.assertEqual(
            list(tokenize('d-pre-foo-suf', ('pre', 'pre-foo-suf'))),
            [Token('d-'), Token('pre-foo-suf')])
Esempio n. 2
0
    def test_tokenize(self):
        self.assertEqual(list(tokenize('the end', ' ')),
                         [Token('the'), Token(' '),
                          Token('end')])

        self.assertEqual(list(tokenize('the end is', (' ', 'the end',))),
                         [Token('the end'), Token(' '),
                          Token('is')])
Esempio n. 3
0
    def test_keyterm_in_begin(self):
        self.assertEqual(
            list(tokenize('pre-foo-suf', ('pre', 'pre-foo-suf'))),
            [Token('pre-foo-suf')])

        self.assertEqual(
            list(tokenize('d-pre-foo-suf', ('pre', 'pre-foo-suf'))),
            [Token('d-'), Token('pre-foo-suf')])
Esempio n. 4
0
    def test_keyterm_in_end(self):
        self.assertEqual(list(tokenize('pre-foo-suf', ('pre-foo-suf', 'suf'))),
                         [Token('pre-foo-suf')])

        self.assertEqual(
            list(tokenize('n.º 2', (' ', 'n.º', '.º'))),
            [Token('n.º'), Token(' '), Token('2')])

        self.assertEqual(list(tokenize('foo-bar', ('foo-bar', 'bar tal'))),
                         [Token('foo-bar')])
Esempio n. 5
0
    def test_tokenize(self):
        self.assertEqual(
            list(tokenize('the end', ' ')),
            [Token('the'), Token(' '), Token('end')])

        self.assertEqual(list(tokenize('the end is', (
            ' ',
            'the end',
        ))), [Token('the end'), Token(' '),
              Token('is')])
Esempio n. 6
0
    def test_keyterm_in_end(self):
        self.assertEqual(
            list(tokenize('pre-foo-suf', ('pre-foo-suf', 'suf'))),
            [Token('pre-foo-suf')])

        self.assertEqual(
            list(tokenize('n.º 2', (' ', 'n.º', '.º'))),
            [Token('n.º'), Token(' '), Token('2')])

        self.assertEqual(
            list(tokenize('foo-bar', ('foo-bar', 'bar tal'))),
            [Token('foo-bar')])
Esempio n. 7
0
    def test_keyterm_in_word(self):
        """
        262.º with keyterm `.º` must return 262 and .º
        """
        self.assertEqual(
            list(tokenize('262.º', (' ', ',', '.º',))),
            [Token('262'), Token('.º')]
        )

        self.assertEqual(
            list(tokenize('262.º-A', (' ', ',', '.º',))),
            [Token('262'), Token('.º'), Token('-A')]
        )
Esempio n. 8
0
    def test_keyterm_subset_of_keyterm(self):
        """
        When keyterm is a subset of the other, return other.
        """
        self.assertEqual(list(tokenize('Decreto-Lei', {'Decreto'})),
                         [Token('Decreto'), Token('-Lei')])

        self.assertEqual(
            list(tokenize('Decreto-Lei', {'Decreto', 'Decreto-Lei'})),
            [Token('Decreto-Lei')])

        self.assertEqual(
            list(tokenize('Decreto-Barro', {'Decreto', 'Decreto-Lei'})),
            [Token('Decreto'), Token('-Barro')])
Esempio n. 9
0
    def test_keyterm_subset_of_keyterm(self):
        """
        When keyterm is a subset of the other, return other.
        """
        self.assertEqual(
            list(tokenize('Decreto-Lei', {'Decreto'})),
            [Token('Decreto'), Token('-Lei')])

        self.assertEqual(
            list(tokenize('Decreto-Lei', {'Decreto', 'Decreto-Lei'})),
            [Token('Decreto-Lei')])

        self.assertEqual(
            list(tokenize('Decreto-Barro', {'Decreto', 'Decreto-Lei'})),
            [Token('Decreto'), Token('-Barro')])
Esempio n. 10
0
 def test_real(self):
     self.assertEqual(
         list(
             tokenize('no n.º 2 do artigo 26.º do Decreto-Lei 2/2013,',
                      (' ', '.', ',', 'Decreto-Lei', 'Decretos-Leis', 'n.º',
                       '.º', 'n.os'))), [
                           Token('no'),
                           Token(' '),
                           Token('n.º'),
                           Token(' '),
                           Token('2'),
                           Token(' '),
                           Token('do'),
                           Token(' '),
                           Token('artigo'),
                           Token(' '),
                           Token('26'),
                           Token('.º'),
                           Token(' '),
                           Token('do'),
                           Token(' '),
                           Token('Decreto-Lei'),
                           Token(' '),
                           Token('2/2013'),
                           Token(',')
                       ])
Esempio n. 11
0
 def test_multiple_keyterms(self):
     self.assertEqual(
         list(
             tokenize('Decreto-Barros',
                      {'Decreto', 'Decreto-Lei', '-Barro'})),
         [Token('Decreto'), Token('-Barro'),
          Token('s')])
Esempio n. 12
0
 def test_shifted_keyterms(self):
     self.assertEqual(
         list(
             tokenize('the foo is bad',
                      (' ', 'the foo stay', 'foo is bad'))),
         [Token('the'), Token(' '),
          Token('foo is bad')])
Esempio n. 13
0
 def test_keyterm_found(self):
     self.assertEqual(
         list(tokenize('this is the end of', (' ', 'the end',))),
         [Token('this'), Token(' '),
          Token('is'), Token(' '),
          Token('the end'),
          Token(' '),
          Token('of')])
Esempio n. 14
0
    def test_keyterm_in_word(self):
        """
        262.º with keyterm `.º` must return 262 and .º
        """
        self.assertEqual(list(tokenize('262.º', (
            ' ',
            ',',
            '.º',
        ))), [Token('262'), Token('.º')])

        self.assertEqual(
            list(tokenize('262.º-A', (
                ' ',
                ',',
                '.º',
            ))),
            [Token('262'), Token('.º'), Token('-A')])
Esempio n. 15
0
    def test_similar_keyterms(self):
        expected = [Token('this'),
                    Token(' '),
                    Token('is'),
                    Token(' '),
                    Token('the'),
                    Token(' '),
                    Token('foo'),
                    Token(' '),
                    Token('of')]

        self.assertEqual(list(tokenize('this is the foo of',
                                       (' ', 'the end', 'the bar'))),
                         expected)

        self.assertEqual(list(tokenize('this is the foo of',
                                       (' ', 'the foo is', 'foo is bad'))),
                         expected)
Esempio n. 16
0
    def test_similar_keyterms(self):
        expected = [
            Token('this'),
            Token(' '),
            Token('is'),
            Token(' '),
            Token('the'),
            Token(' '),
            Token('foo'),
            Token(' '),
            Token('of')
        ]

        self.assertEqual(
            list(tokenize('this is the foo of', (' ', 'the end', 'the bar'))),
            expected)

        self.assertEqual(
            list(
                tokenize('this is the foo of',
                         (' ', 'the foo is', 'foo is bad'))), expected)
Esempio n. 17
0
 def test_real(self):
     self.assertEqual(
         list(tokenize(
             'no n.º 2 do artigo 26.º do Decreto-Lei 2/2013,',
             (' ', '.', ',', 'Decreto-Lei', 'Decretos-Leis',
              'n.º', '.º', 'n.os'))),
         [Token('no'), Token(' '), Token('n.º'), Token(' '),
          Token('2'), Token(' '), Token('do'), Token(' '),
          Token('artigo'), Token(' '), Token('26'),
          Token('.º'), Token(' '), Token('do'),
          Token(' '), Token('Decreto-Lei'), Token(' '),
          Token('2/2013'), Token(',')]
     )
Esempio n. 18
0
 def test_keyterm_found(self):
     self.assertEqual(
         list(tokenize('this is the end of', (
             ' ',
             'the end',
         ))), [
             Token('this'),
             Token(' '),
             Token('is'),
             Token(' '),
             Token('the end'),
             Token(' '),
             Token('of')
         ])
Esempio n. 19
0
def parse(string, managers, terms=set()):
    """
    Parses a string into a list of expressions. Uses managers to replace `Token`s
    by other elements.
    """
    result = []  # the end result

    for manager in managers:
        terms |= manager.terms

    for index, token in enumerate(tokenize(string, terms)):
        result.append(token)

        caught = False
        for manager in managers:
            manager.generate(index, token)
            caught = manager.observe(index, token, caught) or caught
            manager.replace_in(result)

    for manager in managers:
        manager.finish(result)

    return result
Esempio n. 20
0
def parse(string, managers, terms=set()):
    """
    Parses a string into a list of expressions. Uses managers to replace `Token`s
    by other elements.
    """
    result = []  # the end result

    for manager in managers:
        terms |= manager.terms

    for index, token in enumerate(tokenize(string, terms)):
        result.append(token)

        caught = False
        for manager in managers:
            manager.generate(index, token)
            caught = manager.observe(index, token, caught) or caught
            manager.replace_in(result)

    for manager in managers:
        manager.finish(result)

    return result
Esempio n. 21
0
 def test_keyterms_in_string_end(self):
     self.assertEqual(
         list(tokenize('the is', (' ', 'is solid', 'is black'))),
         [Token('the'), Token(' '), Token('is')])
Esempio n. 22
0
 def test_basic(self):
     self.assertEqual(
         list(tokenize('the the', ' ')),
         [Token('the'), Token(' '), Token('the')])
Esempio n. 23
0
 def test_multiple_keyterms(self):
     self.assertEqual(
         list(tokenize('Decreto-Barros', {'Decreto', 'Decreto-Lei', '-Barro'})),
         [Token('Decreto'), Token('-Barro'), Token('s')])
Esempio n. 24
0
 def test_basic(self):
     self.assertEqual(list(tokenize('the the', ' ')),
                      [Token('the'), Token(' '), Token('the')])
Esempio n. 25
0
 def test_keyterm_in_middle(self):
     self.assertEqual(list(tokenize('pre-foo-suf', ('foo', 'pre-foo-suf'))),
                      [Token('pre-foo-suf')])
Esempio n. 26
0
 def test_shifted_keyterms(self):
     self.assertEqual(list(tokenize('the foo is bad',
                                    (' ', 'the foo stay', 'foo is bad'))),
                      [Token('the'),
                       Token(' '),
                       Token('foo is bad')])
Esempio n. 27
0
 def test_keyterm_in_middle(self):
     self.assertEqual(
         list(tokenize('pre-foo-suf', ('foo', 'pre-foo-suf'))),
         [Token('pre-foo-suf')])
Esempio n. 28
0
 def test_keyterms_in_string_end(self):
     self.assertEqual(
         list(tokenize('the is', (' ', 'is solid', 'is black'))),
         [Token('the'), Token(' '), Token('is')])