def name(): """Parses one or more names separated by whitespace, and concatenates with a single space. >>> parse(name, 'John') 'John' >>> parse(name, 'John Smith') 'John Smith' >>> parse(name, 'John, Smith') 'John' >>> parse(name, 'John Smith ') 'John Smith' """ names = map(partial("".join), sep1( partial(many, partial(satisfies, lambda l: l and l.isalpha())), whitespace1)) name = " ".join(names).strip() if not name: fail() return name
def validate_address(text): try: run_text_parser(partial(seq, address, partial(one_of, '\n')), text) return True except Exception, e: print e return False
def identifier(first=partial(one_of, ascii_letters), consecutive=partial(one_of, ascii_letters + digit_chars + '_'), must_contain=set(digit_chars)): """Expects a letter followed by one or more alphanumerical characters. If ``must_contain`` is given, the following letters must include one from this set. The default option is to expect a letter followed by a number of letters and digits, but with a requirement of at least one digit (this allows an easy distinction between names and identifiers). >>> parse(identifier, 'abc123') 'abc123' >>> parse(identifier, 'abc') # doctest: +ELLIPSIS Traceback (most recent call last): ... NoMatch: ... >>> parse(partial(identifier, must_contain=None), 'abc') 'abc' """ result = [] if first is not None: result.append(first()) if must_contain is None: chars = many(consecutive) else: chars = many1(partial(choice, consecutive, partial(one_of, must_contain))) if not set(chars) & must_contain: fail() result.extend(chars) return result
def domain_literal(): optional(CFWS) square() many(partial(seq, partial(optional, FWS), dtext)) optional(FWS) unsquare() optional(CFWS)
def quoted_string(): optional(CFWS) DQUOTE() many(partial(seq, partial(optional, FWS), qcontent)) optional(FWS) DQUOTE() optional(CFWS)
def xmldecl(): caseless_string("xml") whitespace() return ( "xml", optional(partial(xmldecl_attr, "version", version_num), "1.0"), optional(partial(xmldecl_attr, "standalone", standalone), "yes"), )
def processing(parser=False): parser = parser or compose(build_string, partial(many, partial(not_one_of, "?"))) string("<?") commit() result = parser() whitespace() string("?>") return result
def processing(parser = False): parser = parser or compose(build_string, partial(many, partial(not_one_of, '?'))) string('<?') commit() result = parser() whitespace() string('?>') return result
def tags(): """Parse one or more tags, each separated by whitespace and/or a comma. >>> run_parser(tags, 'abc, def')[0] ['abc', 'def'] >>> run_parser(tags, '#abc #def')[0] ['abc', 'def'] """ return map(partial("".join), sep(tag, partial(many1, partial(one_of, ' ,'))))
def identifiers(**kwargs): """Parse multiple identifiers, separated by whitespace and/or comma. >>> run_parser(identifiers, 'abc123 def456')[0] ['abc123', 'def456'] """ term = partial(identifier, **kwargs) return map(partial("".join), sep(term, partial(many1, partial(one_of, ' ,'))))
def char_spec_range(): one_of("[") low = char_spec_range_char() one_of("-") high = char_spec_range_char() one_of("]") return partial(satisfies, lambda c: low <= c <= high)
def parse(cls): one_of('+') pico.one_of_strings('register', 'reg') result = {} @tri def ident(): whitespace1() pico.hash() commit() return many1(any_token) @tri def number(): whitespace1() return many1(partial(one_of, string.digits + ' -+()')) @tri def name(): whitespace1() return pico.name() ident = optional(partial(choice, ident, number), None) if ident is not None: result['ident'] = re.sub('[ \-+()]', '', "".join(ident)) else: name = optional(name, None) if name is not None: result['name'] = name return result
def parse(cls): result = {} @tri def ident(): pico.hash() commit() return many1(any_token) @tri def number(): return many1(partial(one_of, string.digits + ' -+()')) ident = optional(partial(choice, ident, number), None) if ident is not None: result['ident'] = re.sub('[ \-+()]', '', "".join(ident)) else: name = optional(tri(pico.name), None) if name is not None: result['name'] = name whitespace() if peek() and not result: raise FormatError( "We did not understand: %s." % "".join(remaining())) return result
def date(formats=("%m/%d/%Y",)): """Parses a date using one of the supplied formats. To integrate with Django's date format settings, pass in the ``DATE_INPUT_FORMATS`` setting. The default settings is defined in :mod:`django.conf.global_settings` as:: DATE_INPUT_FORMATS = ( '%Y-%m-%d', '%m/%d/%Y', '%m/%d/%y', # '2006-10-25', '10/25/2006', '10/25/06' '%b %d %Y', '%b %d, %Y', # 'Oct 25 2006', 'Oct 25, 2006' '%d %b %Y', '%d %b, %Y', # '25 Oct 2006', '25 Oct, 2006' '%B %d %Y', '%B %d, %Y', # 'October 25 2006', 'October 25, 2006' '%d %B %Y', '%d %B, %Y', # '25 October 2006', '25 October, 2006' ) To use this setting, wrap the function like this: >>> from django.conf import settings >>> date = partial(date, formats=settings.DATE_INPUT_FORMATS) The standard settings enables a wide set of input formats; we demonstrate some of them here: >>> run_parser(date, '12/31/1999')[0].isoformat() '1999-12-31T00:00:00' >>> run_parser(date, 'December 31, 1999')[0].isoformat() '1999-12-31T00:00:00' >>> run_parser(date, '12/31/99')[0].isoformat() '1999-12-31T00:00:00' """ parsers = [partial(_parse_date_format, f) for f in formats] return choice(*map(tri, parsers))
def tag(): """Parse a single tag, optionally prefixed by a hash mark (``'#'``). """ optional(hash, None) return many1(partial(one_of, ascii_letters))
def char_spec_range(): one_of("[") low = char_spec_range_char() one_of('-') high = char_spec_range_char() one_of("]") return partial(satisfies, lambda c: low <= c <= high)
def name(): """Parses one or more names separated by whitespace, and concatenates with a single space. >>> parse(name, 'John') 'John' >>> parse(name, 'John Smith') 'John Smith' >>> parse(name, 'John, Smith') 'John' """ names = map(partial("".join), sep( partial(many1, partial(satisfies, lambda l: l.isalpha())), whitespace)) return " ".join(names)
def quoted(parser=any_token): """Parses as much as possible until it encounters a matching closing quote. By default matches any_token, but can be provided with a more specific parser if required. Returns a string """ quote_char = quote() value, _ = many_until(parser, partial(one_of, quote_char)) return build_string(value)
def parse(cls): one_of('+') caseless_string('epi') aggregates = {} if whitespace(): while peek(): try: code = "".join(pico.one_of_strings(*( tuple(cls.TOKENS) + tuple(cls.ALIAS)))) code = code.upper() except: raise FormatError( "Expected an epidemiological indicator " "such as TB or MA (got: %s)." % \ "".join(remaining())) # rewrite alias code = cls.ALIAS.get(code, code) if code in aggregates: raise FormatError("Duplicate value for %s." % code) whitespace1() try: minus = optional(partial(one_of, '-'), '') value = int("".join([minus]+pico.digits())) except: raise FormatError("Expected a value for %s." % code) if value < 0: raise FormatError("Got %d for %s. You must " "report a positive value." % ( value, cls.TOKENS[code].lower())) aggregates[code] = value many(partial(one_of, ' ,;.')) return { 'aggregates': aggregates }
def one_of_strings(*strings): """Parses one of the strings provided, caseless. >>> parse(partial(one_of_strings, 'abc', 'def'), 'abc') 'abc' >>> parse(partial(one_of_strings, 'abc', 'def'), 'def') 'def' """ return choice(*map(tri, map(partial(partial, caseless_string), strings)))
def unit(): whitespace() unit = one_of_strings( 'day', 'week', 'wk', 'month', 'mo', 'year', 'yr', 'd', 'w', 'm', 'y', )[0] optional(partial(one_of, 'sS'), None) return unit
def parse(cls): result = {} try: result['name'] = pico.name() except: raise FormatError( "Expected name (got: %s)." % "".join(remaining())) try: many1(partial(one_of, ' ,;')) result['sex'] = pico.one_of_strings( 'male', 'female', 'm', 'f')[0].upper() except: raise FormatError( "Expected the infant's gender " "(\"male\", \"female\", or simply \"m\" or \"f\"), " "but received instead: %s." % "".join(remaining())) try: many1(partial(one_of, ' ,;')) words = pico.name().lower() except: raise FormatError( "Expected a location; " "either \"home\", \"clinic\" or \"facility\" " "(got: %s)." % "".join(remaining())) for word in words.split(): matches = difflib.get_close_matches( word, ('home', 'clinic', 'facility')) if matches: result['place'] = matches[0].upper() break else: raise FormatError( "Did not understand the location: %s." % words) return result
def parse(*args): args = list(args) text = args.pop() text = tuple(text) or ("", ) try: result, remaining = run_parser(partial(parser, *args), text) except NoMatch: return None, "" except Exception, exc: # pragma: NOCOVER # backwards compatible with older version of # picoparse; this is equivalent to not # matching if 'Commit / cut called' in str(exc): return None, "" raise
def parse(*args, **kwargs): try: text = kwargs.pop(name) or ("\n", ) except KeyError: raise KeyError( "Expected key: '%s' in arguments (got: %s)." % ( name, repr(kwargs))) try: result, remaining = run_parser(partial(parser, *args, **kwargs), text) except NoMatch: return except Exception, exc: # pragma: NOCOVER # backwards compatible with older version of # picoparse; this is equivalent to not # matching if 'Commit / cut called' in unicode(exc): return raise
def parse(): one_of('+') one_of_strings('register', 'reg') result = {} @tri def ident(): whitespace1() one_of('#') commit() result['ident'] = "".join(many1(partial(not_one_of, ','))) @tri def name(): whitespace1() result['name'] = "".join(many1(partial(not_one_of, ','))) optional(partial(choice, ident, name), None) return result
def floating(): """Parses a floating point number. >>> parse(floating, '123') '123' >>> parse(floating, '123.0') '123.0' >>> parse(floating, '123,0') '123.0' >>> parse(floating, '.123') '.123' >>> parse(floating, '123.') '123.' """ number = optional(digits, []) if optional(partial(choice, comma, dot), None): number += "." number += optional(digits, []) return number
def parse(cls): result = {} try: identifiers = optional(tri(pico.ids), None) if identifiers: result['ids'] = [id.upper() for id in identifiers] else: result['name'] = pico.name() except: raise FormatError( "Expected a name, or a patient's health or tracking ID " "(got: %s)." % "".join(remaining())) if 'name' in result: try: many1(partial(one_of, ' ,;')) result['sex'] = pico.one_of_strings( 'male', 'female', 'm', 'f')[0].upper() except: raise FormatError( "Expected the infant's gender " "(\"male\", \"female\", or simply \"m\" or \"f\"), " "but received instead: %s." % "".join(remaining())) try: pico.separator() except: raise FormatError("Expected age or birthdate of patient.") try: result['age'] = choice(*map(tri, (pico.date, pico.timedelta))) except: raise FormatError("Expected age or birthdate of patient, but " "received %s." % "".join(remaining())) return result
def standalone(): return choice(partial(string, 'yes'), partial(string, 'no'))
def comment(): string("<!--") commit() result, _ = many_until(any_token, tri(partial(string, "-->"))) return "COMMENT", build_string(result)
from picoparse import one_of from picoparse import not_one_of from picoparse import optional from picoparse import partial from picoparse import run_parser from picoparse import satisfies from picoparse import sep from picoparse import sep1 from picoparse import tri from picoparse import NoMatch from picoparse.text import caseless_string from picoparse.text import lexeme from picoparse.text import whitespace from picoparse.text import whitespace1 comma = partial(one_of, ',') dot = partial(one_of, '.') hash = partial(one_of, '#') not_comma = partial(not_one_of, ',') digit = partial(one_of, digit_chars) digits = partial(many1, digit) _unit_days_multiplier = { 'd': 1, 'w': 7, 'm': 30, 'y': 365, } _short_months = [datetime.date(1900, i, 1).strftime('%b') for i in range(1, 13)] _long_months = [datetime.date(1900, i, 1).strftime('%B') for i in range(1, 13)]
def make_caseless_literal(s): "returns a literal string, case independant parser." return partial(s, tri(caseless_string), s)
from picoparse import one_of, many, many1, not_one_of, run_parser, tri, commit, optional, fail from picoparse import choice, string, peek, string, eof, many_until, any_token, satisfies from picoparse import sep, sep1, compose, cue from picoparse.text import build_string, caseless_string, quoted, quote, whitespace, whitespace1 from picoparse.text import lexeme, run_text_parser from picoparse import partial # We define common primative parsers by partial application. This is similar to the lexical # analysis stage of a more traditional parser tool. # # Note here is that these simple parsers are just specialisations of the general purpose # 'one_of' parser that will accept any item that is the provided iterable. # partial(one_of, '<') is equivalent to lambda: one_of('<') # This is an important idea with picoparse, as it lets you express the specialisation more # succinctly and more precisely than defing a whole new function open_angle = partial(one_of, '<') close_angle = partial(one_of, '>') equals = partial(one_of, '=') decimal_digit = partial(one_of, '0123456789') hex_decimal_digit = partial(one_of, '0123456789AaBbCcDdEeFf') # hex_value is a simple parser that knows how to parse out a set of hex digits and return them # as an integer. build_string wraps up u''.join(iterable) for us. def hex_value(): return int(build_string(many(hex_decimal_digit)), 16) # The next primatives we need are for the XML name type. The specification for this is reasonably # involved; instead of manually implementing it, we are going to create a new parser for the # grammer that the spec ifself uses. This parser will generate a new parser for us. # # To be clear, this piece of code creates a parser that runs when the module is loaded, not when
def xml_char_spec(spec, extra_choices=[]): parsers, remainder = run_parser(xml_char_spec_parser, spec.strip()) return partial(choice, *(extra_choices + parsers))
def element(): open_angle() name = xml_name() commit() attributes = lexeme(partial(sep, attribute, whitespace1)) return "NODE", name, attributes, choice(closed_element, partial(open_element, name))
return right else: self.right = right return self def __repr__(self): return "%s(%r, %r, %r)" % (self.__class__.__name__, self.left, self.op, self.right) def evaluate(self): return operator_functions[self.op](self.left.evaluate(), self.right.evaluate()) # parser digits = partial(lexeme, as_string(partial(many1, partial(one_of, digit_chars)))) operator = partial(lexeme, partial(one_of, operators)) @tri def bin_op(): left = term() op = operator() commit() right = expression() whitespace() n = BinaryNode(left, op) return n.merge(right)
def value(): is_negative = optional(partial(one_of, '-'), False) val = choice(float_value, int_value) * (is_negative and -1 or 1) return ValueNode(val)
# POSSIBILITY OF SUCH DAMAGE. from string import whitespace as _whitespace_chars from picoparse import p as partial from picoparse import string, one_of, many, many1, many_until, any_token, run_parser from picoparse import NoMatch, fail, tri, EndOfFile, optional, compose def build_string(iterable): """A utility function to wrap up the converting a list of characters back into a string. """ return u''.join(iterable) as_string = partial(compose, build_string) quote = partial(one_of, "\"'") whitespace_char = partial(one_of, _whitespace_chars) whitespace = as_string(partial(many, whitespace_char)) whitespace1 = as_string(partial(many1, whitespace_char)) newline = partial(one_of, "\n") def caseless_string(s): """Attempts to match input to the letters in the string, without regard for case. """ return string(zip(s.lower(), s.upper())) def lexeme(parser):
def make_literal(s): "returns a literal parser" return partial(s, tri(string), s)
def xmldecl(): caseless_string('xml') whitespace() return ('xml', optional(partial(xmldecl_attr, 'version', version_num), "1.0"), optional(partial(xmldecl_attr, 'standalone', standalone), "yes"))
def prolog(): whitespace() optional(tri(partial(processing, xmldecl)), None) many(partial(choice, processing, comment, whitespace1)) optional(doctype, None) many(partial(choice, processing, comment, whitespace1))
from picoparse.text import caseless_string from router.parser import comma from router.parser import date from router.parser import digits from router.parser import identifier from router.parser import name from router.parser import one_of_strings from router.parser import separator from router.parser import tags from router.parser import timedelta from router.parser import FormatError from router.models import Incoming from router.models import User date = partial(date, formats=settings.DATE_INPUT_FORMATS) class Patient(Model): health_id = models.CharField(max_length=30, null=True) name = models.CharField(max_length=50, null=True) sex = models.CharField(max_length=1, null=True) birthdate = models.DateTimeField(null=True) @property def age(self): if self.birthdate is not None: return datetime.now() - self.birthdate class Report(Model): """Health report."""
def named_entity(): name = build_string(many1(partial(not_one_of,';#'))) if name not in named_entities: fail() return named_entities[name]
def parse(cls): result = {} prefix = optional(tri(identifier), None) if prefix is not None: result['patient_id'] = "".join(prefix) whitespace() one_of('+') caseless_string('muac') if prefix is None: try: whitespace1() part = optional(tri(identifier), None) if part is not None: result['patient_id'] = "".join(part) else: result['name'] = name() except: raise FormatError("Expected a patient id or name.") if 'name' in result: try: separator() result['sex'] = one_of('MmFf').upper() except: raise FormatError("Expected either M or F to indicate the patient's gender.") try: separator() except: raise FormatError("Expected age or birthdate of patient.") try: result['age'] = choice(*map(tri, (date, timedelta))) except: received, stop = many_until(any_token, comma) raise FormatError("Expected age or birthdate of patient, but " "received %s." % "".join(received)) try: if prefix is None: separator() else: whitespace1() reading = choice( partial(one_of_strings, 'red', 'green', 'yellow', 'r', 'g', 'y'), digits) try: reading = int("".join(reading)) except: reading = reading[0].upper() else: whitespace() unit = optional(partial(one_of_strings, 'mm', 'cm'), None) if unit is None: reading = cls.get_reading_in_mm(reading) elif "".join(unit) == 'cm': reading = reading * 10 result['reading'] = reading except: raise FormatError( "Expected MUAC reading (either green, yellow or red), but " "received %s." % "".join(remaining())) if optional(separator, None): result['tags'] = tags() return result
def xml_char_spec_parser(): v = sep1(partial(choice, char_spec_range, char_spec_single_char, char_spec_single_hex_char), char_spec_seperator) eof() return v
else: self.right = right return self def __repr__(self): return "%s(%r, %r, %r)" % (self.__class__.__name__, self.left, self.op, self.right) def evaluate(self): return operator_functions[self.op](self.left.evaluate(), self.right.evaluate()) # parser digits = partial(lexeme, as_string(partial(many1, partial(one_of, digit_chars)))) operator = partial(lexeme, partial(one_of, operators)) @tri def bin_op(): left = term() op = operator() commit() right = expression() whitespace() n = BinaryNode(left, op) return n.merge(right) @tri def parenthetical():
def assertNoMatch(self, parser, input, *args): self.assertRaises(NoMatch, partial(self.run_parser, parser, input), *args)