Esempio n. 1
0
def parse_tree(tree_string, node_factory=CCGNodeFactory):
    parser = CCGParser(node_factory)
    
    toks = preserving_split(tree_string, "()<>", suppressors='<>')

    deriv = parser.read_paren(toks)
    ensure_stream_exhausted(toks, 'ccg.parse_tree')

    return deriv
Esempio n. 2
0
def parse_tree(tree_string, node_factory=CCGNodeFactory):
    parser = CCGParser(node_factory)

    toks = preserving_split(tree_string, "()<>", suppressors='<>')

    deriv = parser.read_paren(toks)
    ensure_stream_exhausted(toks, 'ccg.parse_tree')

    return deriv
Esempio n. 3
0
def parse_category(cat_string):
    '''Parses a category string into a category object. Throws DocParseException if unconsumed
tokens remain.'''
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string, "(\\/|)[]")# + ComplexCategory.mode_symbols)

    result = parse_compound(toks)
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result
Esempio n. 4
0
def parse_category(cat_string):
    '''Parses a category string into a category object. Throws DocParseException if unconsumed
tokens remain.'''
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string,
                            "(\\/|)[]")  # + ComplexCategory.mode_symbols)

    result = parse_compound(toks)
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result
Esempio n. 5
0
def parse_category(cat_string):
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string, "(\\/)[]{}~")# + ComplexCategory.mode_symbols)

    result = parse_compound(toks, {})
    if toks.peek() == '~':
        result.alias = parse_alias(toks)
        
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result
Esempio n. 6
0
def parse_category(cat_string):
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string,
                            "(\\/)[]{}~")  # + ComplexCategory.mode_symbols)

    result = parse_compound(toks, {})
    if toks.peek() == '~':
        result.alias = parse_alias(toks)

    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result
Esempio n. 7
0
    def testPennSplit(self):
        s = ''' 
( (S 
    (NP-SBJ (NNP Mr.) (NNP Vinken) )
    (VP (VBZ is) 
      (NP-PRD 
        (NP (NN chairman) )
        (PP (IN of) 
          (NP 
            (NP (NNP Elsevier) (NNP N.V.) )
            (, ,) 
            (NP (DT the) (NNP Dutch) (VBG publishing) (NN group) )))))
    (. .) ))'''
        
        result = [tok for tok in preserving_split(s, r'()')]
        self.assertEqual(result, '''( ( S ( NP-SBJ ( NNP Mr. ) ( NNP Vinken ) ) ( VP ( VBZ is ) ( NP-PRD ( NP ( NN chairman ) ) ( PP ( IN of ) ( NP ( NP ( NNP Elsevier ) ( NNP N.V. ) ) ( , , ) ( NP ( DT the ) ( NNP Dutch ) ( VBG publishing ) ( NN group ) ) ) ) ) ) ( . . ) ) )'''.split(" "))
Esempio n. 8
0
    def testPennSplit(self):
        s = ''' 
( (S 
    (NP-SBJ (NNP Mr.) (NNP Vinken) )
    (VP (VBZ is) 
      (NP-PRD 
        (NP (NN chairman) )
        (PP (IN of) 
          (NP 
            (NP (NNP Elsevier) (NNP N.V.) )
            (, ,) 
            (NP (DT the) (NNP Dutch) (VBG publishing) (NN group) )))))
    (. .) ))'''

        result = [tok for tok in preserving_split(s, r'()')]
        self.assertEqual(
            result,
            '''( ( S ( NP-SBJ ( NNP Mr. ) ( NNP Vinken ) ) ( VP ( VBZ is ) ( NP-PRD ( NP ( NN chairman ) ) ( PP ( IN of ) ( NP ( NP ( NNP Elsevier ) ( NNP N.V. ) ) ( , , ) ( NP ( DT the ) ( NNP Dutch ) ( VBG publishing ) ( NN group ) ) ) ) ) ) ( . . ) ) )'''
            .split(" "))
Esempio n. 9
0
 def testOnlySplitOnWhitespace(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'@#$%')]
     self.assertEqual(result, r'<a href="index.html">Text</a>'.split(" "))
Esempio n. 10
0
 def testPreserves(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'<>="/')]
     self.assertEqual(
         result, r'< a href = " index.html " > Text < / a >'.split(" "))
Esempio n. 11
0
 def testEmptyPeek(self):
     stream = preserving_split('', '@#$')
     self.assertRaises(StopIteration, stream.next)
     self.failIf(stream.peek())  # peek must yield None
Esempio n. 12
0
 def testPeek(self):
     stream = preserving_split('abc/def.ghi', './')
     for expected_tok in ('abc', '/', 'def', '.', 'ghi'):
         self.assertEqual(stream.peek(), expected_tok)
         stream.next()
Esempio n. 13
0
 def testEmptyPeek(self):
     stream = preserving_split('', '@#$')
     self.assertRaises(StopIteration, stream.next)
     self.failIf(stream.peek()) # peek must yield None
Esempio n. 14
0
 def testPeek(self):
     stream = preserving_split('abc/def.ghi', './')
     for expected_tok in ('abc', '/', 'def', '.', 'ghi'):
         self.assertEqual(stream.peek(), expected_tok)
         stream.next()
Esempio n. 15
0
 def testEmptyInput(self):
     result = [tok for tok in preserving_split('', '')]
     self.failIf(result)
Esempio n. 16
0
 def testSplitOnNothing(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, '', skip_chars='')]
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], s)
Esempio n. 17
0
 def testOnlySplitOnWhitespace(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'@#$%')]
     self.assertEqual(result, r'<a href="index.html">Text</a>'.split(" "))
Esempio n. 18
0
 def testSplitOnNothing(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, '', skip_chars='')]
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], s)
Esempio n. 19
0
 def tokenise(self, tree_string, split_chars, suppressors):
     return preserving_split(tree_string, split_chars='', skip_chars=' \n', suppressors='')
Esempio n. 20
0
 def testEmptyInput(self):
     result = [tok for tok in preserving_split('', '')]
     self.failIf(result)
Esempio n. 21
0
 def testAdjacentSplitters(self):
     result = [tok for tok in preserving_split(r'a.b.cd.ef..g', '.')]
     self.assertEqual(result, r'a . b . cd . ef . . g'.split(" "))
Esempio n. 22
0
 def testAdjacentSplitters(self):
     result = [tok for tok in preserving_split(r'a.b.cd.ef..g', '.')]
     self.assertEqual(result, r'a . b . cd . ef . . g'.split(" "))
Esempio n. 23
0
 def testPreserves(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'<>="/')]
     self.assertEqual(result, r'< a href = " index.html " > Text < / a >'.split(" "))