def processing(parser=False): parser = parser or compose(build_string, partial(many, partial(not_one_of, "?"))) string("<?") commit() result = parser() whitespace() string("?>") return result
def processing(parser = False): parser = parser or compose(build_string, partial(many, partial(not_one_of, '?'))) string('<?') commit() result = parser() whitespace() string('?>') return result
# # To be clear, this piece of code creates a parser that runs when the module is loaded, not when # parsing the XML itself. # # First we are going to create parsers for the individual characters that may appear in the spec, # then we are going to define parsers for a character range. # In the XML character spec, a hexdecimal digit begins with '#x'; cue is a parser combinator. # it takes two parsers, runs the first, and then if that accepts, it runs the second and returns # the result. You can see that we are defining a specialisation of cue with a specialisation of # string (which only accepts if the input matches the iterable it is given), and hex_value above. char_spec_hex = partial(cue, partial(string, "#x"), hex_value) # The next two parsers use a function 'compose' as well as nested partials. This is creating # a parser that returns a parser. compose(f, g) is equivalent to f(g()). char_spec_single_char = compose(partial(partial, one_of), quoted) char_spec_single_hex_char = compose(partial(partial, one_of), char_spec_hex) # Now that we have parsers for the different notations for characters, we need to create a parser # that can choose the correct parser to use. For this we are going to specialise 'choice', This # combinator is given a set of parsers to try in order; If a parser fails, it backtracks and tries # the next, until one succeeds. If none succeed, then the choice fails. char_spec_range_char = partial(choice, char_spec_hex, any_token) # The second part of the character spec is a range. This is more complex that previous parsers # and we are using a def for it. This parser takes advantage of the previous definition of # the char_spec_range_char to find either literal characters or hexdecimal codepoints. # It returns a new parser specialising 'satisfies'. satisfies takes a function that is called # against the input. In this case we are creating a parser that checks that a character is within # the given range def char_spec_range():
# # To be clear, this piece of code creates a parser that runs when the module is loaded, not when # parsing the XML itself. # # First we are going to create parsers for the individual characters that may appear in the spec, # then we are going to define parsers for a character range. # In the XML character spec, a hexdecimal digit begins with '#x'; cue is a parser combinator. # it takes two parsers, runs the first, and then if that accepts, it runs the second and returns # the result. You can see that we are defining a specialisation of cue with a specialisation of # string (which only accepts if the input matches the iterable it is given), and hex_value above. char_spec_hex = partial(cue, partial(string, '#x'), hex_value) # The next two parsers use a function 'compose' as well as nested partials. This is creating # a parser that returns a parser. compose(f, g) is equivalent to f(g()). char_spec_single_char = compose(partial(partial, one_of), quoted) char_spec_single_hex_char = compose(partial(partial, one_of), char_spec_hex) # Now that we have parsers for the different notations for characters, we need to create a parser # that can choose the correct parser to use. For this we are going to specialise 'choice', This # combinator is given a set of parsers to try in order; If a parser fails, it backtracks and tries # the next, until one succeeds. If none succeed, then the choice fails. char_spec_range_char = partial(choice, char_spec_hex, any_token) # The second part of the character spec is a range. This is more complex that previous parsers # and we are using a def for it. This parser takes advantage of the previous definition of # the char_spec_range_char to find either literal characters or hexdecimal codepoints. # It returns a new parser specialising 'satisfies'. satisfies takes a function that is called # against the input. In this case we are creating a parser that checks that a character is within # the given range def char_spec_range():