Python preserving_split Exemples, munge.lex.lex.preserving_split Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : parse.py Projet : Oneplus/cnccgbank

def parse_tree(tree_string, node_factory=CCGNodeFactory):
    parser = CCGParser(node_factory)
    
    toks = preserving_split(tree_string, "()<>", suppressors='<>')

    deriv = parser.read_paren(toks)
    ensure_stream_exhausted(toks, 'ccg.parse_tree')

    return deriv

Exemple #2

0

Afficher le fichier

def parse_tree(tree_string, node_factory=CCGNodeFactory):
    parser = CCGParser(node_factory)

    toks = preserving_split(tree_string, "()<>", suppressors='<>')

    deriv = parser.read_paren(toks)
    ensure_stream_exhausted(toks, 'ccg.parse_tree')

    return deriv

Exemple #3

0

Afficher le fichier

Fichier : parse.py Projet : Oneplus/cnccgbank

def parse_category(cat_string):
    '''Parses a category string into a category object. Throws DocParseException if unconsumed
tokens remain.'''
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string, "(\\/|)[]")# + ComplexCategory.mode_symbols)

    result = parse_compound(toks)
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result

Exemple #4

0

Afficher le fichier

def parse_category(cat_string):
    '''Parses a category string into a category object. Throws DocParseException if unconsumed
tokens remain.'''
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string,
                            "(\\/|)[]")  # + ComplexCategory.mode_symbols)

    result = parse_compound(toks)
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result

Exemple #5

0

Afficher le fichier

Fichier : parse.py Projet : Oneplus/cnccgbank

def parse_category(cat_string):
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string, "(\\/)[]{}~")# + ComplexCategory.mode_symbols)

    result = parse_compound(toks, {})
    if toks.peek() == '~':
        result.alias = parse_alias(toks)
        
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result

Exemple #6

0

Afficher le fichier

def parse_category(cat_string):
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string,
                            "(\\/)[]{}~")  # + ComplexCategory.mode_symbols)

    result = parse_compound(toks, {})
    if toks.peek() == '~':
        result.alias = parse_alias(toks)

    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result

Exemple #7

0

Afficher le fichier

Fichier : lex_tests.py Projet : Oneplus/cnccgbank

    def testPennSplit(self):
        s = ''' 
( (S 
    (NP-SBJ (NNP Mr.) (NNP Vinken) )
    (VP (VBZ is) 
      (NP-PRD 
        (NP (NN chairman) )
        (PP (IN of) 
          (NP 
            (NP (NNP Elsevier) (NNP N.V.) )
            (, ,) 
            (NP (DT the) (NNP Dutch) (VBG publishing) (NN group) )))))
    (. .) ))'''
        
        result = [tok for tok in preserving_split(s, r'()')]
        self.assertEqual(result, '''( ( S ( NP-SBJ ( NNP Mr. ) ( NNP Vinken ) ) ( VP ( VBZ is ) ( NP-PRD ( NP ( NN chairman ) ) ( PP ( IN of ) ( NP ( NP ( NNP Elsevier ) ( NNP N.V. ) ) ( , , ) ( NP ( DT the ) ( NNP Dutch ) ( VBG publishing ) ( NN group ) ) ) ) ) ) ( . . ) ) )'''.split(" "))

Exemple #8

0

Afficher le fichier

    def testPennSplit(self):
        s = ''' 
( (S 
    (NP-SBJ (NNP Mr.) (NNP Vinken) )
    (VP (VBZ is) 
      (NP-PRD 
        (NP (NN chairman) )
        (PP (IN of) 
          (NP 
            (NP (NNP Elsevier) (NNP N.V.) )
            (, ,) 
            (NP (DT the) (NNP Dutch) (VBG publishing) (NN group) )))))
    (. .) ))'''

        result = [tok for tok in preserving_split(s, r'()')]
        self.assertEqual(
            result,
            '''( ( S ( NP-SBJ ( NNP Mr. ) ( NNP Vinken ) ) ( VP ( VBZ is ) ( NP-PRD ( NP ( NN chairman ) ) ( PP ( IN of ) ( NP ( NP ( NNP Elsevier ) ( NNP N.V. ) ) ( , , ) ( NP ( DT the ) ( NNP Dutch ) ( VBG publishing ) ( NN group ) ) ) ) ) ) ( . . ) ) )'''
            .split(" "))

Exemple #9

0

Afficher le fichier

 def testOnlySplitOnWhitespace(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'@#$%')]
     self.assertEqual(result, r'<a href="index.html">Text</a>'.split(" "))

Exemple #10

0

Afficher le fichier

 def testPreserves(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'<>="/')]
     self.assertEqual(
         result, r'< a href = " index.html " > Text < / a >'.split(" "))

Exemple #11

0

Afficher le fichier

 def testEmptyPeek(self):
     stream = preserving_split('', '@#$')
     self.assertRaises(StopIteration, stream.next)
     self.failIf(stream.peek())  # peek must yield None

Exemple #12

0

Afficher le fichier

 def testPeek(self):
     stream = preserving_split('abc/def.ghi', './')
     for expected_tok in ('abc', '/', 'def', '.', 'ghi'):
         self.assertEqual(stream.peek(), expected_tok)
         stream.next()

Exemple #13

0

Afficher le fichier

Fichier : lex_tests.py Projet : Oneplus/cnccgbank

 def testEmptyPeek(self):
     stream = preserving_split('', '@#$')
     self.assertRaises(StopIteration, stream.next)
     self.failIf(stream.peek()) # peek must yield None

Exemple #14

0

Afficher le fichier

Fichier : lex_tests.py Projet : Oneplus/cnccgbank

 def testPeek(self):
     stream = preserving_split('abc/def.ghi', './')
     for expected_tok in ('abc', '/', 'def', '.', 'ghi'):
         self.assertEqual(stream.peek(), expected_tok)
         stream.next()

Exemple #15

0

Afficher le fichier

Fichier : lex_tests.py Projet : Oneplus/cnccgbank

 def testEmptyInput(self):
     result = [tok for tok in preserving_split('', '')]
     self.failIf(result)

Exemple #16

0

Afficher le fichier

Fichier : lex_tests.py Projet : Oneplus/cnccgbank

 def testSplitOnNothing(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, '', skip_chars='')]
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], s)

Exemple #17

0

Afficher le fichier

Fichier : lex_tests.py Projet : Oneplus/cnccgbank

 def testOnlySplitOnWhitespace(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'@#$%')]
     self.assertEqual(result, r'<a href="index.html">Text</a>'.split(" "))

Exemple #18

0

Afficher le fichier

 def testSplitOnNothing(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, '', skip_chars='')]
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], s)

Exemple #19

0

Afficher le fichier

 def tokenise(self, tree_string, split_chars, suppressors):
     return preserving_split(tree_string, split_chars='', skip_chars=' \n', suppressors='')

Exemple #20

0

Afficher le fichier

 def testEmptyInput(self):
     result = [tok for tok in preserving_split('', '')]
     self.failIf(result)

Exemple #21

0

Afficher le fichier

 def testAdjacentSplitters(self):
     result = [tok for tok in preserving_split(r'a.b.cd.ef..g', '.')]
     self.assertEqual(result, r'a . b . cd . ef . . g'.split(" "))

Exemple #22

0

Afficher le fichier

Fichier : lex_tests.py Projet : Oneplus/cnccgbank

 def testAdjacentSplitters(self):
     result = [tok for tok in preserving_split(r'a.b.cd.ef..g', '.')]
     self.assertEqual(result, r'a . b . cd . ef . . g'.split(" "))

Exemple #23

0

Afficher le fichier

Fichier : lex_tests.py Projet : Oneplus/cnccgbank

 def testPreserves(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'<>="/')]
     self.assertEqual(result, r'< a href = " index.html " > Text < / a >'.split(" "))