Ejemplo n.º 1
0
def hwdb_grammar():
    ParserElement.setDefaultWhitespaceChars('')

    prefix = Or(category + ':' + Or(conn) + ':'
                for category, conn in TYPES.items())
    matchline = Combine(prefix + Word(printables + ' ' + '®')) + EOL
    propertyline = (
        White(' ', exact=1).suppress()
        + Combine(UDEV_TAG
                  - '='
                  - Word(alphanums + '_=:@*.! ')
                  - Optional(pythonStyleComment))
        + EOL
    )
    propertycomment = White(' ', exact=1) + pythonStyleComment + EOL

    group = (
        OneOrMore(matchline('MATCHES*') ^ COMMENTLINE.suppress())
        - OneOrMore(propertyline('PROPERTIES*') ^ propertycomment.suppress())
        - (EMPTYLINE ^ stringEnd()).suppress()
    )
    commentgroup = OneOrMore(COMMENTLINE).suppress() - EMPTYLINE.suppress()

    grammar = OneOrMore(group('GROUPS*') ^ commentgroup) + stringEnd()

    return grammar
Ejemplo n.º 2
0
    def _generate_members(self, template_file):
        lines = template_file.readlines()

        target = Fragment.IDENTIFIER
        reference = Suppress("mapping") + Suppress("[") + target.setResultsName("target") + Suppress("]")
        pattern = White(" \t").setResultsName("indent") + reference

        # Find the markers in the template file line by line. If line does not match marker grammar,
        # set it as a literal to be copied as is to the output file.
        for line in lines:
            try:
                parsed = pattern.parseString(line)

                indent = parsed.indent
                target = parsed.target

                marker = TemplateModel.Marker(target, indent, [])

                self.members.append(marker)
            except ParseException:
                # Does not match marker syntax
                self.members.append(line)
Ejemplo n.º 3
0
class NginxParser(object):
    # pylint: disable=expression-not-assigned
    """A class that parses nginx configuration with pyparsing."""

    # constants
    space = Optional(White())
    nonspace = Regex(r"\S+")
    left_bracket = Literal("{").suppress()
    right_bracket = space.leaveWhitespace() + Literal("}").suppress()
    semicolon = Literal(";").suppress()
    key = Word(alphanums + "_/+-.")
    dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+"))
    condition = Regex(r"\(.+\)")
    # Matches anything that is not a special character, and ${SHELL_VARS}, AND
    # any chars in single or double quotes
    # All of these COULD be upgraded to something like
    # https://stackoverflow.com/a/16130746
    dquoted = Regex(r'(\".*\")')
    squoted = Regex(r"(\'.*\')")
    nonspecial = Regex(r"[^\{\};,]")
    varsub = Regex(r"(\$\{\w+\})")
    # nonspecial nibbles one character at a time, but the other objects take
    # precedence.  We use ZeroOrMore to allow entries like "break ;" to be
    # parsed as assignments
    value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial))

    location = CharsNotIn("{};," + string.whitespace)
    # modifier for location uri [ = | ~ | ~* | ^~ ]
    modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~")

    # rules
    comment = space + Literal('#') + restOfLine

    assignment = space + key + Optional(space + value,
                                        default=None) + semicolon
    location_statement = space + Optional(modifier) + Optional(space +
                                                               location +
                                                               space)
    if_statement = space + Literal("if") + space + condition + space
    charset_map_statement = space + Literal(
        "charset_map") + space + value + space + value

    map_statement = space + Literal(
        "map") + space + nonspace + space + dollar_var + space
    # This is NOT an accurate way to parse nginx map entries; it's almost
    # certainly too permissive and may be wrong in other ways, but it should
    # preserve things correctly in mmmmost or all cases.
    #
    #    - I can neither prove nor disprove that it is correct wrt all escaped
    #      semicolon situations
    # Addresses https://github.com/fatiherikli/nginxparser/issues/19
    map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace
    map_entry = space + map_pattern + space + value + space + semicolon
    map_block = Group(
        Group(map_statement).leaveWhitespace() + left_bracket +
        Group(ZeroOrMore(Group(comment | map_entry)) +
              space).leaveWhitespace() + right_bracket)

    block = Forward()

    # key could for instance be "server" or "http", or "location" (in which case
    # location_statement needs to have a non-empty location)

    block_begin = (Group(space + key + location_statement)
                   ^ Group(if_statement)
                   ^ Group(charset_map_statement)).leaveWhitespace()

    block_innards = Group(
        ZeroOrMore(Group(comment | assignment) | block | map_block) +
        space).leaveWhitespace()

    block << Group(block_begin + left_bracket + block_innards + right_bracket)

    script = OneOrMore(Group(comment | assignment) ^ block
                       ^ map_block) + space + stringEnd
    script.parseWithTabs().leaveWhitespace()

    def __init__(self, source):
        self.source = source

    def parse(self):
        """Returns the parsed tree."""
        return self.script.parseString(self.source)

    def as_list(self):
        """Returns the parsed tree as a list."""
        return self.parse().asList()
Ejemplo n.º 4
0
def usfmTokenNumber(key):
    return Group(
        Suppress(backslash) + Literal(key) + Suppress(White()) +
        Word(nums + '-()') + Suppress(White()))
Ejemplo n.º 5
0
Bold = Suppress(Literal('**'))
Italic = Suppress(Literal('__'))
Striked = Suppress(Literal('~~'))
Text = OneOrMore(Word(printables))

StyledText = Forward()
BoldText = (Bold + StyledText + Bold)('is_bold')
ItalicText = (Italic + StyledText + Italic)('is_italic')
StrikedText = (Striked + StyledText + Striked)('is_striked')
StyledText << (BoldText | ItalicText | StrikedText
               | StopOnSuffix(['**', '__', '~~', '!icon=', '<!--', '(see:']))
StyledText.resultsName = 'text'
StyledText.saveAsList = True  # must be done at this point, not before
TextGrammar = StyledText | Text.setResultsName('text', listAllMatches=True)

Checkbox = (Literal('[') + (Literal('x')('is_checked') | White()) +
            Literal(']'))('has_checkbox')

Icon = Literal('!icon=') + Word(printables).setResultsName('icons',
                                                           listAllMatches=True)

DestNodeText = QuotedString('"', escChar='\\')
See = Keyword('(see:') + delimitedList(
    DestNodeText, delim=',').setResultsName('see') + Literal(')')

XMLAttrs = Literal('<!--') + StopOnSuffix(
    ['-->']).setResultsName('attrs') + Literal('-->')

Url = CharsNotIn(') ')('url')
ImgDimensions = Word(nums)('img_width') + Literal('x') + Word(nums)(
    'img_height')
Ejemplo n.º 6
0
class White(White):
    """ Customize whitespace to match the CSS spec values"""
    def __init__(self, ws=" \t\r\n\f", min=1, max=0, exact=0):
        super(White, self).__init__(ws, min, max, exact)


escaped = (
    Literal("\\").suppress() +
    #chr(20)-chr(126) + chr(128)-unichr(sys.maxunicode)
    Regex(u"[\u0020-\u007e\u0080-\uffff]", re.IGNORECASE))


def convertToUnicode(t):
    return unichr(int(t[0], 16))


hex_unicode = (
    Literal("\\").suppress() + Regex("[0-9a-f]{1,6}", re.IGNORECASE) +
    Optional(White(exact=1)).suppress()).setParseAction(convertToUnicode)

escape = hex_unicode | escaped

#any unicode literal outside the 0-127 ascii range
nonascii = Regex(u"[^\u0000-\u007f]")

#single character for starting an identifier.
nmstart = Regex(u"[A-Z]", re.IGNORECASE) | nonascii | escape

nmchar = Regex(u"[0-9A-Z-]", re.IGNORECASE) | nonascii | escape

identifier = Combine(nmstart + ZeroOrMore(nmchar))
Ejemplo n.º 7
0
def _tdb_grammar():  #pylint: disable=R0914
    """
    Convenience function for getting the pyparsing grammar of a TDB file.
    """
    int_number = Word(nums).setParseAction(lambda t: [int(t[0])])
    # matching float w/ regex is ugly but is recommended by pyparsing
    float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?') \
        .setParseAction(lambda t: [float(t[0])])
    # symbol name, e.g., phase name, function name
    symbol_name = Word(alphanums + '_:', min=1)
    # species name, e.g., CO2, AL, FE3+
    species_name = Word(alphanums + '+-*', min=1) + Optional(Suppress('%'))
    # constituent arrays are semicolon-delimited
    # each subarray can be comma- or space-delimited
    constituent_array = Group(
        delimitedList(Group(delimitedList(species_name, ',') & \
                            ZeroOrMore(species_name)
                           ), ':')
        )
    param_types = MatchFirst(
        [TCCommand(param_type) for param_type in TDB_PARAM_TYPES])
    # Let sympy do heavy arithmetic / algebra parsing for us
    # a convenience function will handle the piecewise details
    func_expr = Optional(float_number) + OneOrMore(SkipTo(';') \
        + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \
        Suppress(Word('YNyn', exact=1)))
    # ELEMENT
    cmd_element = TCCommand('ELEMENT') + Word(alphas + '/-', min=1, max=2)
    # TYPE_DEFINITION
    cmd_typedef = TCCommand('TYPE_DEFINITION') + \
        Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd())
    # FUNCTION
    cmd_function = TCCommand('FUNCTION') + symbol_name + \
        func_expr.setParseAction(_make_piecewise_ast)
    # ASSESSED_SYSTEMS
    cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd())
    # DEFINE_SYSTEM_DEFAULT
    cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd())
    # DEFAULT_COMMAND
    cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd())
    # LIST_OF_REFERENCES
    cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd())
    # PHASE
    cmd_phase = TCCommand('PHASE') + symbol_name + \
        Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \
        Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd()
    # CONSTITUENT
    cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \
        Suppress(White()) + Suppress(':') + constituent_array + \
        Suppress(':') + LineEnd()
    # PARAMETER
    cmd_parameter = TCCommand('PARAMETER') + param_types + \
        Suppress('(') + symbol_name + Suppress(',') + constituent_array + \
        Optional(Suppress(';') + int_number, default=0) + Suppress(')') + \
        func_expr.setParseAction(_make_piecewise_ast)
    # Now combine the grammar together
    all_commands = cmd_element | \
                    cmd_typedef | \
                    cmd_function | \
                    cmd_ass_sys | \
                    cmd_defsysdef | \
                    cmd_defcmd | \
                    cmd_lor | \
                    cmd_phase | \
                    cmd_constituent | \
                    cmd_parameter
    return all_commands
Ejemplo n.º 8
0
# Redis PyParsing grammar

quot = Optional(oneOf(('"', "'")))
command = oneOf(
    ('CONFIG', 'DBSIZE', 'DECR', 'DECRBY', 'DEL', 'DUMP', 'ECHO', 'EXISTS',
     'EXPIRE', 'EXPIREAT', 'FLUSHDB', 'GET', 'HDEL', 'HEXISTS', 'HGET',
     'HGETALL', 'HINCRBY', 'HKEYS', 'HLEN', 'HSETNX', 'HVALS', 'INCR',
     'INCRBY', 'INFO', 'KEYS', 'LLEN', 'LPOP', 'LPUSH', 'LPUSHX', 'LRANGE',
     'LREM', 'LSET', 'LTRIM', 'MGET', 'MSET', 'MSETNX', 'OBJECT', 'PERSIST',
     'PEXPIRE', 'PEXPIREAT', 'PING', 'PSETEX', 'PTTL', 'RANDOMKEY', 'RENAME',
     'RENAMENX', 'RESTORE', 'RPOP', 'SADD', 'SET', 'SISMEMBER', 'SMEMBERS',
     'SREM', 'TIME', 'TTL', 'TYPE', 'ZADD', 'ZRANGE', 'ZREM'),
    caseless=True).setResultsName('command')
parameters = (OneOrMore(Word(alphanums + '-' +
                             punctuation))).setResultsName('parameters')
redis_grammar = command + Optional(White().suppress() + parameters)

# ################################################################################################################################


class LuaContainer(object):
    """ A class which knows how to add and execute Lua scripts against Redis.
    """
    def __init__(self, kvdb=None, initial_programs=None):
        self.kvdb = kvdb
        self.lua_programs = {}
        self.add_initial_lua_programs(initial_programs or {})

    def add_initial_lua_programs(self, programs):
        for name, program in programs:
            self.add_lua_program(name, program)
Ejemplo n.º 9
0
import pyparsing
from pyparsing import Optional, White, Word, Regex, alphas, CaselessLiteral, CaselessKeyword, oneOf, delimitedList, Forward, ZeroOrMore, NotAny, Keyword, Literal

# transformString doesn't play nice with suppressed anything, including
# whitespace. So it seems like the only effective way to use it is with
# explicit whitespace.
pyparsing.ParserElement.setDefaultWhitespaceChars('')

W = White()
OW = Optional(White())

CKeyword = CaselessKeyword
comma_list = lambda x: x + ZeroOrMore(OW + ',' + OW + x)

unary_op = oneOf('- + ~', caseless=True)
unary_op |= CKeyword('NOT')

# TODO this does not encode precedence
binary_op = oneOf("|| * / % + - << >> & | < <= > >= = == != <>", caseless=True)
binary_op |= reduce(lambda x,y: x|y, [CKeyword(x) for x in 'IS,IS NOT,IN,LIKE,GLOB,MATCH,REGEXP,AND,OR'.split(',')])

# these direct from the SQLite docs
KEYWORDS = 'ABORT ACTION ADD AFTER ALL ALTER ANALYZE AND AS ASC ATTACH AUTOINCREMENT BEFORE BEGIN BETWEEN BY CASCADE CASE CAST CHECK COLLATE COLUMN COMMIT CONFLICT CONSTRAINT CREATE CROSS CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP DATABASE DEFAULT DEFERRABLE DEFERRED DELETE DESC DETACH DISTINCT DROP EACH ELSE END ESCAPE EXCEPT EXCLUSIVE EXISTS EXPLAIN FAIL FOR FOREIGN FROM FULL GLOB GROUP HAVING IF IGNORE IMMEDIATE IN INDEX INDEXED INITIALLY INNER INSERT INSTEAD INTERSECT INTO IS ISNULL JOIN KEY LEFT LIKE LIMIT MATCH NATURAL NO NOT NOTNULL NULL OF OFFSET ON OR ORDER OUTER PLAN PRAGMA PRIMARY QUERY RAISE REFERENCES REGEXP REINDEX RELEASE RENAME REPLACE RESTRICT RIGHT ROLLBACK ROW SAVEPOINT SELECT SET TABLE TEMP TEMPORARY THEN TO TRANSACTION TRIGGER UNION UNIQUE UPDATE USING VACUUM VALUES VIEW VIRTUAL WHEN WHERE'

# TODO probably not right charset & does not account for escaping identifiers
# https://www.sqlite.org/lang_keywords.html
identifier = NotAny(
	reduce(lambda x,y: x|y, [CKeyword(x) for x in KEYWORDS.split(' ')])
	) + Regex('[a-zA-Z_][a-zA-Z0-9_]*')

# for the purposes of attaching parse actions to these
Ejemplo n.º 10
0
def get_logical_operator():
    return CaselessLiteral('AND') | CaselessLiteral('OR') | White().suppress()
Ejemplo n.º 11
0
def formula_grammar(table):
    """
    Construct a parser for molecular formulas.

    :Parameters:

        *table* = None : PeriodicTable
             If table is specified, then elements and their associated fields
             will be chosen from that periodic table rather than the default.

    :Returns:
        *parser* : pyparsing.ParserElement.
            The ``parser.parseString()`` method returns a list of
            pairs (*count, fragment*), where fragment is an *isotope*,
            an *element* or a list of pairs (*count, fragment*).

    """

    # Recursive
    composite = Forward()
    mixture = Forward()

    # whitespace and separators
    space = Optional(White().suppress())
    separator = space + Literal('+').suppress() + space

    # Lookup the element in the element table
    symbol = Regex("[A-Z][a-z]*")
    symbol = symbol.setParseAction(lambda s, l, t: table.symbol(t[0]))

    # Translate isotope
    openiso = Literal('[').suppress()
    closeiso = Literal(']').suppress()
    isotope = Optional(~White() + openiso + Regex("[1-9][0-9]*") + closeiso,
                       default='0')
    isotope = isotope.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 0)

    # Translate ion
    openion = Literal('{').suppress()
    closeion = Literal('}').suppress()
    ion = Optional(~White() + openion + Regex("([1-9][0-9]*)?[+-]") + closeion,
                   default='0+')
    ion = ion.setParseAction(
        lambda s, l, t: int(t[0][-1] + (t[0][:-1] if len(t[0]) > 1 else '1')))

    # Translate counts
    fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)")
    fract = fract.setParseAction(lambda s, l, t: float(t[0]) if t[0] else 1)
    whole = Regex("[1-9][0-9]*")
    whole = whole.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 1)
    count = Optional(~White() + (fract | whole), default=1)

    # Convert symbol, isotope, ion, count to (count, isotope)
    element = symbol + isotope + ion + count

    def convert_element(string, location, tokens):
        """interpret string as element"""
        #print "convert_element received", tokens
        symbol, isotope, ion, count = tokens[0:4]
        if isotope != 0:
            symbol = symbol[isotope]
        if ion != 0:
            symbol = symbol.ion[ion]
        return (count, symbol)

    element = element.setParseAction(convert_element)

    # Convert "count elements" to a pair
    implicit_group = count + OneOrMore(element)

    def convert_implicit(string, location, tokens):
        """convert count followed by fragment"""
        #print "implicit", tokens
        count = tokens[0]
        fragment = tokens[1:]
        return fragment if count == 1 else (count, fragment)

    implicit_group = implicit_group.setParseAction(convert_implicit)

    # Convert "(composite) count" to a pair
    opengrp = space + Literal('(').suppress() + space
    closegrp = space + Literal(')').suppress() + space
    explicit_group = opengrp + composite + closegrp + count

    def convert_explicit(string, location, tokens):
        """convert (fragment)count"""
        #print "explicit", tokens
        count = tokens[-1]
        fragment = tokens[:-1]
        return fragment if count == 1 else (count, fragment)

    explicit_group = explicit_group.setParseAction(convert_explicit)

    # Build composite from a set of groups
    group = implicit_group | explicit_group
    implicit_separator = separator | space
    composite << group + ZeroOrMore(implicit_separator + group)

    density = Literal('@').suppress() + count + Optional(Regex("[ni]"),
                                                         default='i')
    compound = composite + Optional(density, default=None)

    def convert_compound(string, location, tokens):
        """convert material @ density"""
        #print "compound", tokens
        if tokens[-1] is None:
            return Formula(structure=_immutable(tokens[:-1]))
        elif tokens[-1] == 'n':
            return Formula(structure=_immutable(tokens[:-2]),
                           natural_density=tokens[-2])
        else:
            return Formula(structure=_immutable(tokens[:-2]),
                           density=tokens[-2])

    compound = compound.setParseAction(convert_compound)

    partsep = space + Literal('//').suppress() + space
    percent = Literal('%').suppress()

    weight_percent = Regex("%(w((eigh)?t)?|m(ass)?)").suppress() + space
    by_weight = (count + weight_percent + mixture +
                 ZeroOrMore(partsep + count +
                            (weight_percent | percent) + mixture) + partsep +
                 mixture)

    def convert_by_weight(string, location, tokens):
        """convert mixture by %wt or %mass"""
        #print "by weight", tokens
        piece = tokens[1:-1:2] + [tokens[-1]]
        fract = [float(v) for v in tokens[:-1:2]]
        fract.append(100 - sum(fract))
        #print piece, fract
        if len(piece) != len(fract):
            raise ValueError("Missing base component of mixture")
        if fract[-1] < 0:
            raise ValueError("Formula percentages must sum to less than 100%")
        return _mix_by_weight_pairs(zip(piece, fract))

    mixture_by_weight = by_weight.setParseAction(convert_by_weight)

    volume_percent = Regex("%v(ol(ume)?)?").suppress() + space
    by_volume = (count + volume_percent + mixture +
                 ZeroOrMore(partsep + count +
                            (volume_percent | percent) + mixture) + partsep +
                 mixture)

    def convert_by_volume(string, location, tokens):
        """convert mixture by %vol"""
        #print "by volume", tokens
        piece = tokens[1:-1:2] + [tokens[-1]]
        fract = [float(v) for v in tokens[:-1:2]]
        fract.append(100 - sum(fract))
        #print piece, fract
        if len(piece) != len(fract):
            raise ValueError("Missing base component of mixture " + string)
        if fract[-1] < 0:
            raise ValueError("Formula percentages must sum to less than 100%")
        return _mix_by_volume_pairs(zip(piece, fract))

    mixture_by_volume = by_volume.setParseAction(convert_by_volume)

    mixture_by_layer = Forward()
    layer_thick = Group(count + Regex(LENGTH_RE) + space)
    layer_part = (layer_thick + mixture) | (opengrp + mixture_by_layer +
                                            closegrp + count)
    mixture_by_layer << layer_part + ZeroOrMore(partsep + layer_part)

    def convert_by_layer(string, location, tokens):
        """convert layer thickness '# nm material'"""
        if len(tokens) < 2:
            return tokens
        piece = []
        fract = []
        for p1, p2 in zip(tokens[0::2], tokens[1::2]):
            if isinstance(p1, Formula):
                f = p1.absthick * float(p2)
                p = p1
            else:
                f = float(p1[0]) * LENGTH_UNITS[p1[1]]
                p = p2
            piece.append(p)
            fract.append(f)
        total = sum(fract)
        vfract = [(v / total) * 100 for v in fract]
        result = _mix_by_volume_pairs(zip(piece, vfract))
        result.thickness = total
        return result

    mixture_by_layer = mixture_by_layer.setParseAction(convert_by_layer)

    mixture_by_absmass = Forward()
    absmass_mass = Group(count + Regex(MASS_VOLUME_RE) + space)
    absmass_part = (absmass_mass + mixture) | (opengrp + mixture_by_absmass +
                                               closegrp + count)
    mixture_by_absmass << absmass_part + ZeroOrMore(partsep + absmass_part)

    def convert_by_absmass(string, location, tokens):
        """convert mass '# mg material'"""
        if len(tokens) < 2:
            return tokens
        piece = []
        fract = []
        for p1, p2 in zip(tokens[0::2], tokens[1::2]):
            if isinstance(p1, Formula):
                p = p1
                f = p1.total_mass * float(p2)
            else:
                p = p2
                value = float(p1[0])
                if p1[1] in VOLUME_UNITS:
                    # convert to volume in liters to mass in grams before mixing
                    if p.density is None:
                        raise ValueError("Need the mass density of " + str(p))
                    f = value * VOLUME_UNITS[p1[1]] * 1000. * p.density
                else:
                    f = value * MASS_UNITS[p1[1]]
            piece.append(p)
            fract.append(f)

        total = sum(fract)
        mfract = [(m / total) * 100 for m in fract]
        result = _mix_by_weight_pairs(zip(piece, mfract))
        result.total_mass = total
        return result

    mixture_by_absmass = mixture_by_absmass.setParseAction(convert_by_absmass)

    ungrouped_mixture = (mixture_by_weight | mixture_by_volume
                         | mixture_by_layer | mixture_by_absmass)
    grouped_mixture = opengrp + ungrouped_mixture + closegrp + Optional(
        density, default=None)

    def convert_mixture(string, location, tokens):
        """convert (mixture) @ density"""
        formula = tokens[0]
        if tokens[-1] == 'n':
            formula.natural_density = tokens[-2]
        elif tokens[-1] == 'i':
            formula.density = tokens[-2]
        # elif tokens[-1] is None
        return formula

    grouped_mixture = grouped_mixture.setParseAction(convert_mixture)

    mixture << (compound | grouped_mixture)
    formula = (compound | ungrouped_mixture | grouped_mixture)
    grammar = Optional(formula, default=Formula()) + StringEnd()

    grammar.setName('Chemical Formula')
    return grammar
Ejemplo n.º 12
0
def _tdb_grammar(): #pylint: disable=R0914
    """
    Convenience function for getting the pyparsing grammar of a TDB file.
    """
    int_number = Word(nums).setParseAction(lambda t: [int(t[0])])
    # symbol name, e.g., phase name, function name
    symbol_name = Word(alphanums+'_:', min=1)
    ref_phase_name = symbol_name = Word(alphanums+'_-:()/', min=1)
    # species name, e.g., CO2, AL, FE3+
    species_name = Word(alphanums+'+-*/_.', min=1) + Optional(Suppress('%'))
    # constituent arrays are colon-delimited
    # each subarray can be comma- or space-delimited
    constituent_array = Group(delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)), ':'))
    param_types = MatchFirst([TCCommand(param_type) for param_type in TDB_PARAM_TYPES])
    # Let sympy do heavy arithmetic / algebra parsing for us
    # a convenience function will handle the piecewise details
    func_expr = (float_number | ZeroOrMore(',').setParseAction(lambda t: 0.01)) + OneOrMore(SkipTo(';') \
        + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \
        Suppress(Word('YNyn', exact=1) | White()))
    # ELEMENT
    cmd_element = TCCommand('ELEMENT') + Word(alphas+'/-', min=1, max=2) + ref_phase_name + \
        float_number + float_number + float_number + LineEnd()
    # SPECIES
    cmd_species = TCCommand('SPECIES') + species_name + chemical_formula + LineEnd()
    # TYPE_DEFINITION
    cmd_typedef = TCCommand('TYPE_DEFINITION') + \
        Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd())
    # FUNCTION
    cmd_function = TCCommand('FUNCTION') + symbol_name + \
        func_expr.setParseAction(_make_piecewise_ast)
    # ASSESSED_SYSTEMS
    cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd())
    # DEFINE_SYSTEM_DEFAULT
    cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd())
    # DEFAULT_COMMAND
    cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd())
    # DATABASE_INFO
    cmd_database_info = TCCommand('DATABASE_INFO') + SkipTo(LineEnd())
    # VERSION_DATE
    cmd_version_date = TCCommand('VERSION_DATE') + SkipTo(LineEnd())
    # REFERENCE_FILE
    cmd_reference_file = TCCommand('REFERENCE_FILE') + SkipTo(LineEnd())
    # ADD_REFERENCES
    cmd_add_ref = TCCommand('ADD_REFERENCES') + SkipTo(LineEnd())
    # LIST_OF_REFERENCES
    cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd())
    # TEMPERATURE_LIMITS
    cmd_templim = TCCommand('TEMPERATURE_LIMITS') + SkipTo(LineEnd())
    # PHASE
    cmd_phase = TCCommand('PHASE') + symbol_name + \
        Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \
        Suppress(int_number) + Group(OneOrMore(float_number)) + \
        Suppress(SkipTo(LineEnd()))
    # CONSTITUENT
    cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \
        Suppress(White()) + Suppress(':') + constituent_array + \
        Suppress(':') + LineEnd()
    # PARAMETER
    cmd_parameter = TCCommand('PARAMETER') + param_types + \
        Suppress('(') + symbol_name + \
        Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \
        Suppress(',') + constituent_array + \
        Optional(Suppress(';') + int_number, default=0) + \
        Suppress(')') + func_expr.setParseAction(_make_piecewise_ast)
    # Now combine the grammar together
    all_commands = cmd_element | \
                    cmd_species | \
                    cmd_typedef | \
                    cmd_function | \
                    cmd_ass_sys | \
                    cmd_defsysdef | \
                    cmd_defcmd | \
                    cmd_database_info | \
                    cmd_version_date | \
                    cmd_reference_file | \
                    cmd_add_ref | \
                    cmd_lor | \
                    cmd_templim | \
                    cmd_phase | \
                    cmd_constituent | \
                    cmd_parameter
    return all_commands
Ejemplo n.º 13
0
def parser(text):
    cvtTuple = lambda toks: tuple(toks.asList())
    cvtRaw = lambda toks: RawString(' '.join(map(str, toks.asList())))
    #cvtDict = lambda toks: dict(toks.asList())
    cvtGlobDict = lambda toks: GlobDict(toks.asList())
    cvtDict = cvtGlobDict
    extractText = lambda s, l, t: RawString(s[t._original_start:t._original_end])

    def pythonize(toks):
        s = toks[0]
        if s == 'true':
            return True
        elif s == 'false':
            return False
        elif s == 'none':
            return [None]
        elif s.isdigit():
            return int(s)
        elif re.match('(?i)^-?(\d+\.?e\d+|\d+\.\d*|\.\d+)$', s):
            return float(s)
        return toks[0]

    def noneDefault(s, loc, t):
        return t if len(t) else [RawEOL]

    # define punctuation as suppressed literals
    lbrace, rbrace = map(Suppress, "{}")

    identifier = Word(printables, excludeChars='{}"\'')
    quotedStr = QuotedString('"', escChar='\\', multiline=True) | \
                QuotedString('\'', escChar='\\', multiline=True)
    quotedIdentifier = QuotedString('"', escChar='\\', unquoteResults=False) | \
                       QuotedString('\'', escChar='\\', unquoteResults=False)
    dictStr = Forward()
    setStr = Forward()
    objStr = Forward()

    #anyIdentifier = identifier | quotedIdentifier
    oddIdentifier = identifier + quotedIdentifier
    dictKey = dictStr | quotedStr | \
              Combine(oddIdentifier).setParseAction(cvtRaw)
    dictKey.setParseAction(cvtRaw)

    dictValue = quotedStr | dictStr | setStr | \
                Combine(oddIdentifier).setParseAction(cvtRaw)

    if OLD_STYLE_KEYS:
        dictKey |= Combine(identifier + ZeroOrMore(White(' ') + (identifier + ~FollowedBy(Optional(White(' ')) + LineEnd()))))
        dictValue |= identifier.setParseAction(pythonize)
    else:
        dictKey |= identifier
        dictValue |= delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True).setParseAction(pythonize)

    ParserElement.setDefaultWhitespaceChars(' \t')
    #dictEntry = Group(Combine(OneOrMore(identifier | quotedIdentifier)).setParseAction(cvtRaw) +
    dictEntry = Group(dictKey +
                      Optional(White(' ').suppress() + dictValue).setParseAction(noneDefault) +
                      Optional(White(' ').suppress()) +
                      LineEnd().suppress())
    #dictEntry = Group(SkipTo(dictKey + LineEnd() + dictKey))
    dictStr << (lbrace + ZeroOrMore(dictEntry) + rbrace)
    dictStr.setParseAction(cvtDict)
    ParserElement.setDefaultWhitespaceChars(' \t\r\n')

    setEntry = identifier.setParseAction(pythonize) | quotedString.setParseAction(removeQuotes)
    setStr << (lbrace + delimitedList(setEntry, delim=White()) + rbrace)
    setStr.setParseAction(cvtTuple)

    # TODO: take other literals as arguments
    blobObj = Group(((Literal('ltm') + Literal('rule') + identifier) | \
                     (Literal('rule') + identifier)).setParseAction(cvtRaw) +
                    originalTextFor(nestedExpr('{', '}')).setParseAction(extractText))

    objEntry = Group(OneOrMore(identifier | quotedIdentifier).setParseAction(cvtRaw) +
                     Optional(dictStr).setParseAction(noneDefault))
    objStr << (Optional(delimitedList(blobObj | objEntry, delim=LineEnd())))
    objStr.setParseAction(cvtGlobDict)
    #objStr.setParseAction(cvtTuple)
    objStr.ignore(pythonStyleComment)

    return objStr.parseString(text)[0]
Ejemplo n.º 14
0
def _make_default_parser():
    escapechar = "\\"

    #wordchars = printables
    #for specialchar in '*?^():"{}[] ' + escapechar:
    #    wordchars = wordchars.replace(specialchar, "")
    #wordtext = Word(wordchars)

    wordtext = CharsNotIn('\\*?^():"{}[] ')
    escape = Suppress(escapechar) + (Word(printables, exact=1)
                                     | White(exact=1))
    wordtoken = Combine(OneOrMore(wordtext | escape))

    # A plain old word.
    plainWord = Group(wordtoken).setResultsName("Word")

    # A wildcard word containing * or ?.
    wildchars = Word("?*")
    # Start with word chars and then have wild chars mixed in
    wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken))
    # Or, start with wildchars, and then either a mixture of word and wild chars, or the next token
    wildstart = wildchars + (OneOrMore(wordtoken + Optional(wildchars))
                             | FollowedBy(White() | StringEnd()))
    wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard")

    # A range of terms
    startfence = Literal("[") | Literal("{")
    endfence = Literal("]") | Literal("}")
    rangeitem = QuotedString('"') | wordtoken
    openstartrange = Group(
        Empty()) + Suppress(Keyword("TO") + White()) + Group(rangeitem)
    openendrange = Group(rangeitem) + Suppress(White() +
                                               Keyword("TO")) + Group(Empty())
    normalrange = Group(rangeitem) + Suppress(White() + Keyword("TO") +
                                              White()) + Group(rangeitem)
    range = Group(startfence + (normalrange | openstartrange | openendrange) +
                  endfence).setResultsName("Range")

    # A word-like thing
    generalWord = range | wildcard | plainWord

    # A quoted phrase
    quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes")

    expression = Forward()

    # Parentheses can enclose (group) any expression
    parenthetical = Group(
        (Suppress("(") + expression + Suppress(")"))).setResultsName("Group")

    boostableUnit = generalWord | quotedPhrase
    boostedUnit = Group(boostableUnit + Suppress("^") +
                        Word("0123456789", ".0123456789")).setResultsName(
                            "Boost")

    # The user can flag that a parenthetical group, quoted phrase, or word
    # should be searched in a particular field by prepending 'fn:', where fn is
    # the name of the field.
    fieldableUnit = parenthetical | boostedUnit | boostableUnit
    fieldedUnit = Group(Word(alphanums + "_") + Suppress(':') +
                        fieldableUnit).setResultsName("Field")

    # Units of content
    unit = fieldedUnit | fieldableUnit

    # A unit may be "not"-ed.
    operatorNot = Group(
        Suppress(Keyword("not", caseless=True)) + Suppress(White()) +
        unit).setResultsName("Not")
    generalUnit = operatorNot | unit

    andToken = Keyword("AND", caseless=False)
    orToken = Keyword("OR", caseless=False)
    andNotToken = Keyword("ANDNOT", caseless=False)

    operatorAnd = Group(generalUnit + Suppress(White()) + Suppress(andToken) +
                        Suppress(White()) + expression).setResultsName("And")
    operatorOr = Group(generalUnit + Suppress(White()) + Suppress(orToken) +
                       Suppress(White()) + expression).setResultsName("Or")
    operatorAndNot = Group(unit + Suppress(White()) + Suppress(andNotToken) +
                           Suppress(White()) + unit).setResultsName("AndNot")

    expression << (OneOrMore(operatorAnd | operatorOr | operatorAndNot
                             | generalUnit | Suppress(White())) | Empty())

    toplevel = Group(expression).setResultsName("Toplevel") + StringEnd()

    return toplevel.parseString
Ejemplo n.º 15
0
class NetworkParser(object):

    interface = Word(alphanums)
    key = Word(alphanums + "-_")
    space = White().suppress()
    value = CharsNotIn("{}\n#")
    line = Regex("^.*$")
    comment = ("#")
    method = Regex("loopback|manual|dhcp|static")
    stanza = Regex("auto|iface|mapping")
    option_key = Regex("bridge_\w*|post-\w*|up|down|pre-\w*|address"
                       "|network|netmask|gateway|broadcast|dns-\w*|scope|"
                       "pointtopoint|metric|hwaddress|mtu|hostname|"
                       "leasehours|leasetime|vendor|client|bootfile|server"
                       "|mode|endpoint|dstaddr|local|ttl|provider|unit"
                       "|options|frame|netnum|media")
    _eol = Literal("\n").suppress()
    option = Forward()
    option << Group(space
                    #+ Regex("^\s*")
                    + option_key + space + SkipTo(_eol))
    interface_block = Forward()
    interface_block << Group(stanza + space + interface +
                             Optional(space + Regex("inet") + method +
                                      Group(ZeroOrMore(option))))

    # + Group(ZeroOrMore(assignment)))

    interface_file = OneOrMore(interface_block).ignore(pythonStyleComment)

    file_header = """# File parsed and saved by privacyidea.\n\n"""

    def __init__(self, infile="/etc/network/interfaces", content=None):
        self.filename = None
        if content:
            self.content = content
        else:
            self.filename = infile
            self._read()

        self.interfaces = self.get_interfaces()

    def _read(self):
        """
        Reread the contents from the disk
        """
        f = codecs.open(self.filename, "r", "utf-8")
        self.content = f.read()
        f.close()

    def get(self):
        """
        return the grouped config
        """
        if self.filename:
            self._read()
        config = self.interface_file.parseString(self.content)
        return config

    def save(self, filename=None):
        if not filename and not self.filename:
            raise Exception("No filename specified")

        # The given filename overrules the own filename
        fname = filename or self.filename
        f = open(fname, "w")
        f.write(self.format())
        f.close()

    def format(self):
        """
        Format the single interfaces e.g. for writing to a file.

        {"eth0": {"auto": True,
                  "method": "static",
                  "options": {"address": "1.1.1.1",
                              "netmask": "255.255.255.0"
                              }
                  }
        }
        results in

        auto eth0
        iface eth0 inet static
            address 1.1.1.1
            netmask 255.255.255.0

        :param interface: dictionary of interface
        :return: string
        """
        output = ""
        for iface, iconfig in self.interfaces.items():
            if iconfig.get("auto"):
                output += "auto %s\n" % iface

            output += "iface %s inet %s\n" % (iface,
                                              iconfig.get("method", "manual"))
            # options
            for opt_key, opt_value in iconfig.get("options", {}).items():
                output += "    %s %s\n" % (opt_key, opt_value)
            # add a new line
            output += "\n"
        return output

    def get_interfaces(self):
        """
        return the configuration by interfaces as a dictionary like

        { "eth0": {"auto": True,
                   "method": "static",
                   "options": {"address": "192.168.1.1",
                               "netmask": "255.255.255.0",
                               "gateway": "192.168.1.254",
                               "dns-nameserver": "1.2.3.4"
                               }
                   }
        }

        :return: dict
        """
        interfaces = {}
        np = self.get()
        for idefinition in np:
            interface = idefinition[1]
            if interface not in interfaces:
                interfaces[interface] = {}
            # auto?
            if idefinition[0] == "auto":
                interfaces[interface]["auto"] = True
            elif idefinition[0] == "iface":
                method = idefinition[3]
                interfaces[interface]["method"] = method
            # check for options
            if len(idefinition) == 5:
                options = {}
                for o in idefinition[4]:
                    options[o[0]] = o[1]
                interfaces[interface]["options"] = options
        return interfaces
Ejemplo n.º 16
0
    def __init__(self, query):
        self._methods = {
            'and': self.evaluate_and,
            'or': self.evaluate_or,
            'not': self.evaluate_not,
            'parenthesis': self.evaluate_parenthesis,
            'quotes': self.evaluate_quotes,
            'word': self.evaluate_word,
        }

        self.line = ''
        self.query = query.lower() if query else ''

        if self.query:
            # TODO: Cleanup
            operator_or = Forward()
            operator_word = Group(Word(alphanums)).setResultsName('word')

            operator_quotes_content = Forward()
            operator_quotes_content << (
                (operator_word + operator_quotes_content) | operator_word)

            operator_quotes = Group(
                Suppress('"') + operator_quotes_content +
                Suppress('"')).setResultsName('quotes') | operator_word

            operator_parenthesis = Group(
                (Suppress('(') + operator_or + Suppress(")")
                 )).setResultsName('parenthesis') | operator_quotes

            operator_not = Forward()
            operator_not << (
                Group(Suppress(Keyword('no', caseless=True)) +
                      operator_not).setResultsName('not')
                | operator_parenthesis)

            operator_and = Forward()
            operator_and << (
                Group(operator_not + Suppress(Keyword('and', caseless=True)) +
                      operator_and).setResultsName('and') |
                Group(operator_not + OneOrMore(~oneOf('and or') + operator_and)
                      ).setResultsName('and') | operator_not)

            operator_or << (
                Group(operator_and + Suppress(Keyword('or', caseless=True)) +
                      operator_or).setResultsName('or') | operator_and)

            self._query_parser = operator_or.parseString(self.query)[0]
        else:
            self._query_parser = False

        time_cmpnt = Word(nums).setParseAction(lambda t: t[0].zfill(2))
        date = Combine((time_cmpnt + '-' + time_cmpnt + '-' + time_cmpnt) +
                       ' ' + time_cmpnt + ':' + time_cmpnt)
        word = Word(printables)

        self._log_parser = (
            date.setResultsName('timestamp') +
            word.setResultsName('log_level') + word.setResultsName('plugin') +
            (White(min=16).setParseAction(
                lambda s, l, t: [t[0].strip()]).setResultsName('task') |
             (White(min=1).suppress() & word.setResultsName('task'))) +
            restOfLine.setResultsName('message'))
Ejemplo n.º 17
0
def formula_grammar(table):
    """
    Construct a parser for molecular formulas.

    :Parameters:

        *table* = None : PeriodicTable
             If table is specified, then elements and their associated fields
             will be chosen from that periodic table rather than the default.

    :Returns:
        *parser* : pyparsing.ParserElement.
            The ``parser.parseString()`` method returns a list of
            pairs (*count,fragment*), where fragment is an *isotope*,
            an *element* or a list of pairs (*count,fragment*).

    """
    # Recursive
    formula = Forward()

    # Lookup the element in the element table
    symbol = Regex("[A-Z][a-z]*")
    symbol = symbol.setParseAction(lambda s, l, t: table.symbol(t[0]))

    # Translate isotope
    openiso = Literal('[').suppress()
    closeiso = Literal(']').suppress()
    isotope = Optional(~White() + openiso + Regex("[1-9][0-9]*") + closeiso,
                       default='0')
    isotope = isotope.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 0)

    # Translate counts
    fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)")
    fract = fract.setParseAction(lambda s, l, t: float(t[0]) if t[0] else 1)
    whole = Regex("[1-9][0-9]*")
    whole = whole.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 1)
    count = Optional(~White() + (fract | whole), default=1)

    # Convert symbol,isotope,count to (count,isotope)
    element = symbol + isotope + count

    def convert_element(string, location, tokens):
        #print "convert_element received",tokens
        symbol, isotope, count = tokens[0:3]
        if isotope != 0: symbol = symbol[isotope]
        return (count, symbol)

    element = element.setParseAction(convert_element)

    # Convert "count elements" to a pair
    implicit_group = count + OneOrMore(element)

    def convert_implicit(string, location, tokens):
        #print "convert_implicit received",tokens
        count = tokens[0]
        fragment = tokens[1:]
        return fragment if count == 1 else (count, fragment)

    implicit_group = implicit_group.setParseAction(convert_implicit)

    # Convert "(formula) count" to a pair
    opengrp = Literal('(').suppress()
    closegrp = Literal(')').suppress()
    explicit_group = opengrp + formula + closegrp + count

    def convert_explicit(string, location, tokens):
        #print "convert_group received",tokens
        count = tokens[-1]
        fragment = tokens[:-1]
        return fragment if count == 1 else (count, fragment)

    explicit_group = explicit_group.setParseAction(convert_explicit)

    group = implicit_group | explicit_group
    separator = Optional(Literal('+').suppress()) + Optional(
        White().suppress())
    formula << group + ZeroOrMore(
        Optional(White().suppress()) + separator + group)
    grammar = Optional(formula) + StringEnd()

    grammar.setName('Chemical Formula')
    return grammar
Ejemplo n.º 18
0
    def script(self):
        # constants
        left_bracket = Suppress("{")
        right_bracket = Suppress("}")
        semicolon = Suppress(";")
        space = White().suppress()
        keyword = Word(alphanums + ".+-_/")
        path = Word(alphanums + ".-_/")
        variable = Word("$_-" + alphanums)
        value_wq = Regex(r'(?:\([^\s;]*\)|\$\{\w+\}|[^\s;(){}])+')
        value_sq = NginxQuotedString(quoteChar="'")
        value_dq = NginxQuotedString(quoteChar='"')
        value = (value_dq | value_sq | value_wq)
        # modifier for location uri [ = | ~ | ~* | ^~ ]
        location_modifier = (Keyword("=") | Keyword("~*") | Keyword("~")
                             | Keyword("^~"))
        # modifier for if statement
        if_modifier = Combine(
            Optional("!") +
            (Keyword("=") | Keyword("~*") | Keyword("~")
             | (Literal("-") +
                (Literal("f") | Literal("d") | Literal("e") | Literal("x")))))
        # This ugly workaround needed to parse unquoted regex with nested parentheses
        # so we capture all content between parentheses and then parse it :(
        # TODO(buglloc): may be use something better?
        condition_body = (
            (if_modifier + Optional(space) + value) |
            (variable +
             Optional(space + if_modifier + Optional(space) + value)))
        condition = Regex(r'\((?:[^()\n\r\\]|(?:\(.*\))|(?:\\.))+?\)')\
            .setParseAction(lambda s, l, t: condition_body.parseString(t[0][1:-1]))

        # rules
        include = (Keyword("include") + space + value + semicolon)("include")

        directive = (keyword + ZeroOrMore(space + value) +
                     semicolon)("directive")

        file_delimiter = (Suppress("# configuration file ") + path +
                          Suppress(":"))("file_delimiter")

        comment = (Regex(r"#.*"))("comment").setParseAction(_fix_comment)

        hash_value = Group(value + ZeroOrMore(space + value) +
                           semicolon)("hash_value")

        generic_block = Forward()
        if_block = Forward()
        location_block = Forward()
        hash_block = Forward()
        unparsed_block = Forward()

        sub_block = OneOrMore(
            Group(if_block | location_block | hash_block | generic_block
                  | include | directive | file_delimiter | comment
                  | unparsed_block))

        if_block << (
            Keyword("if") + Group(condition) +
            Group(left_bracket + Optional(sub_block) + right_bracket))("block")

        location_block << (Keyword("location") + Group(
            Optional(space + location_modifier) + Optional(space) + value) +
                           Group(left_bracket + Optional(sub_block) +
                                 right_bracket))("block")

        hash_block << (keyword + Group(OneOrMore(space + value)) +
                       Group(left_bracket + Optional(OneOrMore(hash_value)) +
                             right_bracket))("block")

        generic_block << (
            keyword + Group(ZeroOrMore(space + value)) +
            Group(left_bracket + Optional(sub_block) + right_bracket))("block")

        unparsed_block << (
            keyword + Group(ZeroOrMore(space + value)) +
            nestedExpr(opener="{", closer="}"))("unparsed_block")

        return sub_block
Ejemplo n.º 19
0
def all_in(name):
    return (tag('allin{}'.format(name)) + Suppress(':') +
            Suppress(Optional(White())) + OneOrMore(
                word, stopOn=any_tag | StringEnd())).setParseAction(tag_value)
Ejemplo n.º 20
0
INCP = Literal(
    '>'
)  # increment the data pointer (to point to the next cell to the right).
DECP = Literal(
    '<')  # decrement the data pointer (to point to the next cell to the left).
INPUT = Literal(
    ','
)  # accept one byte of input, storing its value in the byte at the data pointer.
OUTPUT = Literal('.')  # output the byte at the data pointer.
OPEN_LOOP = Literal(
    '['
)  # if the byte at the data pointer is zero, then instead of moving the instruction pointer forward to the next command, jump it forward to the command after the matching ] command.
CLOSE_LOOP = Literal(
    ']'
)  # if the byte at the data pointer is nonzero, then instead of moving the instruction pointer forward to the next command, jump it back to the command after the matching [ command.
COMMENTS = Combine(Word(printables) + White(ws='\n') | Word(printables))

program = ZeroOrMore(ADD | SUB | INCP | DECP | INPUT | OUTPUT | OPEN_LOOP
                     | CLOSE_LOOP | Suppress(COMMENTS))


class LEXER(object):
    def __init__(self, file):
        self.path = file
        self.token_list = []

    def tokenize_file(self):
        try:

            return program.parseFile(self.path)
        except:
Ejemplo n.º 21
0
from pyparsing import Word, oneOf, White, OneOrMore, alphanums, LineEnd, \
    Group, Suppress, Literal, printables, ParseException, ungroup

# For tags that have an argument in the form of
# a conditional expression. The reason this is done
# is so that a tag with the ">" operator in the
# arguments will parse correctly.
OPERAND = Word(alphanums + "." + '"' + '/-' + "*:^_![]?$%@)(#=`" + '\\')
OPERATOR = oneOf(["<=", ">=", "==", "!=", "<", ">", "~"], useRegex=False)
EXPRESSION_TAG = OPERAND + White() + OPERATOR + White() + OPERAND

# LITERAL_TAG will match tags that do not have
# a conditional expression. So any other tag
# with arguments that don't contain OPERATORs
LITERAL_TAG = OneOrMore(Word(
    alphanums + '*:' + '/' + '"-' + '.' + " " + "^" + "_" + "!" + "[]?$"
    + "'" + '\\'
))
# Will match the start of any tag
TAG_START_GRAMMAR = Group(Literal("<") + (EXPRESSION_TAG | LITERAL_TAG)
                          + Literal(">") + LineEnd())

# Will match the end of any tag
TAG_END_GRAMMAR = Group(Literal("</") + Word(alphanums) + Literal(">")
                        + LineEnd())

# Will match any directive. We are performing
# a simple parse by matching the directive on
# the left, and everything else on the right.
ANY_DIRECTIVE = Group(Word(alphanums) + Suppress(White())
                      + Word(printables + "     ") + LineEnd())
Ejemplo n.º 22
0
import re
import sys
from pyparsing import (Word, White, Literal, Regex,
                       LineEnd, SkipTo,
                       ZeroOrMore, OneOrMore, Combine, Optional, Suppress,
                       Group, ParserElement,
                       stringEnd, pythonStyleComment)

EOL = LineEnd().suppress()
NUM1 = Word('0123456789abcdefABCDEF', exact=1)
NUM2 = Word('0123456789abcdefABCDEF', exact=2)
NUM3 = Word('0123456789abcdefABCDEF', exact=3)
NUM4 = Word('0123456789abcdefABCDEF', exact=4)
NUM6 = Word('0123456789abcdefABCDEF', exact=6)
TAB = White('\t', exact=1).suppress()
COMMENTLINE = pythonStyleComment + EOL
EMPTYLINE = LineEnd()
text_eol = lambda name: Regex(r'[^\n]+')(name) + EOL

ParserElement.set_default_whitespace_chars(' \n')

def klass_grammar():
    klass_line = Literal('C ').suppress() + NUM2('klass') + text_eol('text')
    subclass_line = TAB + NUM2('subclass') + text_eol('text')
    protocol_line = TAB + TAB + NUM2('protocol') + text_eol('name')
    subclass = (subclass_line('SUBCLASS') -
                ZeroOrMore(Group(protocol_line)('PROTOCOLS*')
                           ^ COMMENTLINE.suppress()))
    klass = (klass_line('KLASS') -
             ZeroOrMore(Group(subclass)('SUBCLASSES*')
Ejemplo n.º 23
0
# need to add support for alg expressions
columnRval = realNum | intNum | quotedString.addParseAction(
    removeQuotes) | columnName
whereCondition = Group((columnName + binop + (columnRval | Word(printables)))
                       | (columnName + in_ + "(" + delimitedList(columnRval) +
                          ")") | (columnName + in_ + "(" + statement + ")")
                       | ("(" + whereExpression + ")"))
whereExpression << whereCondition + ZeroOrMore((and_ | or_) + whereExpression)
''' Assignment for handoff. '''
setExpression = Forward()
setStatement = Group((ident) | (quotedString("json_path") + AS + ident("name"))
                     | ("(" + setExpression + ")"))
setExpression << setStatement + ZeroOrMore((and_ | or_) + setExpression)

optWhite = ZeroOrMore(LineEnd() | White())
""" Define the statement grammar. """
statement <<= (Group(
    Group(SELECT + question_graph_expression)("concepts") + optWhite +
    Group(FROM + tableNameList) + optWhite +
    Group(Optional(WHERE + whereExpression("where"), "")) + optWhite +
    Group(Optional(SET + setExpression("set"), ""))("select"))
               | Group(SET +
                       (columnName + EQ +
                        (quotedString | ident | intNum | realNum)))("set")
               | Group(
                   Group(CREATE + GRAPH + ident) + optWhite +
                   Group(AT + (ident | quotedString)) + optWhite +
                   Group(AS + (ident | quotedString))))("statement")
""" Make a program a series of statements. """
program_grammar = statement + ZeroOrMore(statement)
Ejemplo n.º 24
0
from .errors import GrammarError
from .expansions import (AlternativeSet, KleeneStar, Literal, NamedRuleRef,
                         NullRef, OptionalGrouping, RequiredGrouping, Repeat,
                         Sequence, VoidRef, SingleChildExpansion)
from .grammars import Grammar, Import
from .references import (optionally_qualified_name, import_name, grammar_name,
                         word, words)
from .rules import Rule

# Define angled brackets that don't appear in the output.
langle, rangle = map(Suppress, "<>")

# Define line endings as either ; or \n. This will also gobble empty lines.
line_delimiter = Suppress(
    OneOrMore((PPLiteral(";") | White("\n")).setName("line end")))


class WrapperExpansion(SingleChildExpansion):
    """ Wrapper expansion class used during the parser's post-processing stage. """


class WeightedExpansion(SingleChildExpansion):
    """
    Internal class used during parsing of alternative sets with weights.
    """
    def __init__(self, expansion, weight):
        super(WeightedExpansion, self).__init__(expansion)
        self.weight = weight
        self._child = expansion
Ejemplo n.º 25
0
class TypeDocGrammar:
    """
    EOL ::= ["\r"] "\n"
    SOL ::= LINE_START
    line ::= [^EOL]+ EOL
    word ::= alphanums + "_"
    indented_block ::= INDENT (line_indented | any_line)
    line_indented ::= any_line indented_block
    type_definition ::= ":type" [^:]+ ":" [^EOL]+
    rtype_definition ::= ":rtype:" [^EOL]+
    returns_definition ::= (":returns:" | ":return:") [^EOL]+
    param_definition ::= ":param" [^:]+ ":" [^EOL]+ EOL [indented_block]
    response_structure ::= "**Response Structure**" line [indented_block]
    typed_dict_key_line ::= "-" "**" word "**" "*(" word ")" "--*" [^EOL]+ + EOL
    type_line ::= "-" "*(" word ")" "--*" [^EOL]+ + EOL
    any_line ::= typed_dict_key_line | type_line | line
    """

    indent_stack = [1]
    SOL = LineStart().suppress()
    EOL = LineEnd().suppress()
    word = Word(alphanums + "_")
    line = SkipTo(LineEnd()) + EOL
    line_indented = Forward()
    any_line = Forward()
    indented_block = indentedBlock(
        line_indented | any_line,
        indentStack=indent_stack).setResultsName("indented")
    line_indented <<= any_line + indented_block

    type_definition = (SOL + Literal(":type") +
                       SkipTo(":").setResultsName("name") + Literal(":") +
                       SkipTo(EOL).setResultsName("type_name"))

    rtype_definition = (SOL + Literal(":rtype:") +
                        SkipTo(EOL).setResultsName("type_name"))

    returns_definition = (SOL + (Literal(":returns:") | Literal(":return:")) +
                          SkipTo(EOL).setResultsName("description"))

    param_definition = (SOL + Literal(":param") +
                        SkipTo(":").setResultsName("name") + Literal(":") +
                        SkipTo(EOL).setResultsName("description") + EOL +
                        Optional(indented_block))

    response_structure = Literal("**Response Structure**") + line_indented

    typed_dict_key_line = (Literal("-") + White(ws=" \t") + Literal("**") +
                           word.setResultsName("name") + Literal("**") +
                           White(ws=" \t") + Literal("*(") +
                           word.setResultsName("type_name") + Literal(")") +
                           White(ws=" \t") + Literal("--*") +
                           SkipTo(EOL).setResultsName("description") + EOL)

    type_line = (Literal("-") + White(ws=" \t") + Literal("*(") +
                 word.setResultsName("type_name") + Literal(")") +
                 White(ws=" \t") + Literal("--*") +
                 SkipTo(EOL).setResultsName("description") + EOL)

    any_line <<= (typed_dict_key_line | type_line
                  | line).setResultsName("line")

    @classmethod
    def fail_action(cls, _input_string: str, _chr_index: int, _source: str,
                    error: BaseException) -> None:
        if "found end of text" not in str(error):
            raise error

    @classmethod
    def reset(cls) -> None:
        cls.disable_packrat()
        cls.indented_block.setFailAction(cls.fail_action)
        cls.indent_stack.clear()
        cls.indent_stack.append(1)

    @staticmethod
    def enable_packrat() -> None:
        ParserElement.enablePackrat(cache_size_limit=128)

    @staticmethod
    def disable_packrat() -> None:
        ParserElement.enablePackrat(cache_size_limit=None)
Ejemplo n.º 26
0
def usfmTokenValue(key, value):
    return Group(
        Suppress(backslash) + Literal(key) + Suppress(White()) +
        Optional(value))
Ejemplo n.º 27
0
class SmilesPattern:
    def __init__(self):
        pass

    def addRawStr(toks):
        if 'branch' in toks:
            toks['rawStr'] = toks['branch']
        else:
            toks['rawStr'] = ''.join(toks[:])
        return toks

    #whitespace = " \t\n"
    whitespace = White().leaveWhitespace()
    ### ATOM SECTION ###
    # Organic Subset section
    _aliphatic_organic = ( Literal('Cl').setResultsName('symbol') \
                         | Literal('Br').setResultsName('symbol') \
                         | Word('BCNOSPFI',exact=1).setResultsName('symbol') ).setResultsName('organic')
    _aromatic_organic = ( Literal('c').setResultsName('symbol') \
                         | Word('bnosp',exact=1).setResultsName('symbol') ).setResultsName('organic')
    #_aliphatic_organic.setResultsName('organic')
    #_aromatic_organic.setResultsName('organic')

    # Bracketed Atoms section
    _isotope = Word(nums, min=1)
    _element_symbols =Literal('He') | Literal('Li') | Literal('Be') | Literal('Ne') | Literal('Na') | Literal('Mg') \
                    | Literal('Al') | Literal('Si') | Literal('Cl') | Literal('Ar') | Literal('Ca') | Literal('Sc') \
                    | Literal('Ti') | Literal('Cr') | Literal('Mn') | Literal('Fe') | Literal('Co') | Literal('Ni') \
                    | Literal('Cu') | Literal('Zn') | Literal('Ga') | Literal('Ge') | Literal('As') | Literal('Se') \
                    | Literal('Br') | Literal('Kr') | Literal('Rb') | Literal('Sr') | Literal('Zr') | Literal('Nb') \
                    | Literal('Mo') | Literal('Tc') | Literal('Ru') | Literal('Rh') | Literal('Pd') | Literal('Ag') \
                    | Literal('Cd') | Literal('In') | Literal('Sn') | Literal('Sb') | Literal('Te') | Literal('Xe') \
                    | Literal('Cs') | Literal('Ba') | Literal('Hf') | Literal('Ta') | Literal('Re') | Literal('Os') \
                    | Literal('Ir') | Literal('Pt') | Literal('Au') | Literal('Hg') | Literal('Tl') | Literal('Pb') \
                    | Literal('Bi') | Literal('Po') | Literal('At') | Literal('Rn') | Literal('Fr') | Literal('Ra') \
                    | Literal('Rf') | Literal('Db') | Literal('Sg') | Literal('Bh') | Literal('Hs') | Literal('Mt') \
                    | Literal('Ds') | Literal('Rg') | Literal('La') | Literal('Ce') | Literal('Pr') | Literal('Nd') \
                    | Literal('Pm') | Literal('Sm') | Literal('Eu') | Literal('Gd') | Literal('Tb') | Literal('Dy') \
                    | Literal('Ho') | Literal('Er') | Literal('Tm') | Literal('Yb') | Literal('Lu') | Literal('Ac') \
                    | Literal('Th') | Literal('Pa') | Literal('Np') | Literal('Pu') | Literal('Am') | Literal('Cm') \
                    | Literal('Bk') | Literal('Cf') | Literal('Es') | Literal('Fm') | Literal('Md') | Literal('No') \
                    | Literal('Lr') \
                    | Literal('H') | Literal('B') | Literal('C') | Literal('N') | Literal('O') | Literal('F') | Literal('P')  \
                    | Literal('S') | Literal('K') | Literal('V') | Literal('Y') | Literal('I') | Literal('W') | Literal('U')
    _aromatic_symbols = Literal('se') | Literal('as') | Word('cnops', exact=1)
    _symbol = _element_symbols | _aromatic_symbols | Literal('*')

    # Chirality section
    _chiral = Literal('@@') | Literal(
        '@')  #|  Literal('@TH1') | Literal('@TH2') \
    #| Literal('@SP1') | Literal('@SP2') | Literal('@SP3') \
    #| Literal('@AL1') | Literal('@AL2') | '@TB'+Word(nums,min=1,max=2) | '@OH'+Word(nums,min=1,max=2)
    _chiral.setParseAction(''.join)

    # Hydrogens section
    _hcount = Literal('H') + (Word('123456789', exact=1) *
                              (0, 1)).setResultsName('nH')
    #_hcount.setParseAction(''.join)

    # Charge section
    _charge = ('-' + Word('123456789', exact=1) *
               (0, 1)) | ('+' + Word('123456789', exact=1) *
                          (0, 1)) | Literal('--') | Literal('++')
    #_charge.setParseAction(''.join)

    # Atom Class section
    _class = ':' + Word(nums, min=1)

    # Bracketed Atom definition
    _bracket_atom = '[' + _isotope.setResultsName('isotope')*(0,1)  \
                        + _symbol.setResultsName('symbol')          \
                        + _chiral.setResultsName('chiral')*(0,1)    \
                        + _hcount.setResultsName('hcount')*(0,1)    \
                        + _charge.setResultsName('charge')*(0,1)    \
                        + _class.setResultsName('_class')*(0,1)      \
                        + ']'
    #_bracket_atom.setResultsName('bracket_atom')

    # Atom definition
    #_atom = _aliphatic_organic | _aromatic_organic | _bracket_atom | Literal('*').setResultsName('symbol')
    _atom = _aliphatic_organic \
            | _aromatic_organic \
            | _bracket_atom.setResultsName('bracket_atom') \
            | Literal('*').setResultsName('symbol')
    #def addRawStr(toks):
    #    toks['rawStr']=''.join(toks)
    #    return toks
    #_atom.setParseAction(addRawStr)
    _atom.leaveWhitespace()
    #_atom.setParseAction(''.join)
    #_atom.setParseAction(lambda locn,tokens: (locn,''.join(tokens[:])))

    ### BOND SECTION ###
    _bond = Word('-=#:\/', exact=1)
    _bond.leaveWhitespace()
    #_bond.setParseAction(addRawStr)

    #_ringbond = _bond*(0,1) + \
    #            (Word(nums,exact=1).setParseAction(lambda tok:[''.join(tok)] ) | \
    #            (Literal('%')+Word(nums,exact=2).setResultsName('ringid')).setParseAction(lambda tok:[''.join(tok[:])] ) )
    _ringbond = (_bond*(0,1)).setResultsName('ringbondtype') + \
                (Word(nums,exact=1).setResultsName('ringid') | \
                 Literal('%')+Word(nums,exact=2).setResultsName('ringid') )

    _ringbond.leaveWhitespace()
    #_ringbond.setParseAction(addRawStr)

    _dot = Literal('.')
    #_dot.setParseAction(addRawStr)

    _smilesChar = _ringbond.setResultsName('ringbond') | _bond.setResultsName('bond') \
                | _atom.setResultsName('atom') | _dot.setResultsName('dot')

    _branchContent = _smilesChar * (1, None)
    _branchContent.setParseAction(lambda toks: ''.join(toks))

    _branch = nestedExpr('(', ')', content=_branchContent)
    _branch.setParseAction(lambda toks: '(' + ''.join(
        [str(item) for sublist in toks for item in sublist]) + ')')

    _smilesElement = _smilesChar | _branch.setResultsName('branch')
    _smilesElement.setParseAction(addRawStr)
Ejemplo n.º 28
0
def usfmToken(key):
    return Group(Suppress(backslash) + Literal(key) + Suppress(White()))
Ejemplo n.º 29
0
    def __init__(self):
        """ A program is a list of statements.
        Statements can be 'set' or 'select' statements.
        """
        statement = Forward()
        SELECT, FROM, WHERE, SET, AS = map(CaselessKeyword, "select from where set as".split())
        
        ident          = Word( "$" + alphas, alphanums + "_$" ).setName("identifier")
        columnName     = delimitedList(ident, ".", combine=True).setName("column name")
        columnNameList = Group( delimitedList(columnName))
        tableName      = delimitedList(ident, ".", combine=True).setName("column name")
        tableNameList  = Group(delimitedList(tableName))
        
        SEMI,COLON,LPAR,RPAR,LBRACE,RBRACE,LBRACK,RBRACK,DOT,COMMA,EQ = map(Literal,";:(){}[].,=")
        arrow = Literal ("->")
        t_expr = Group(ident + LPAR + Word("$" + alphas, alphanums + "_$") + RPAR + ZeroOrMore(LineEnd())).setName("t_expr") | \
                 Word(alphas, alphanums + "_$") + ZeroOrMore(LineEnd())
        t_expr_chain = t_expr + ZeroOrMore(arrow + t_expr)
        
        whereExpression = Forward()
        and_, or_, in_ = map(CaselessKeyword, "and or in".split())
        
        binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
        realNum = ppc.real()
        intNum = ppc.signed_integer()
        
        columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions
        whereCondition = Group(
            ( columnName + binop + (columnRval | Word(printables) ) ) |
            ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) |
            ( columnName + in_ + "(" + statement + ")" ) |
            ( "(" + whereExpression + ")" )
        )
        whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression )
        
        '''
        Assignment for handoff.
        '''
        setExpression = Forward ()
        setStatement = Group(
            ( ident ) |
            ( quotedString("json_path") + AS + ident("name") ) |
            ( "(" + setExpression + ")" )
        )
        setExpression << setStatement + ZeroOrMore( ( and_ | or_ ) + setExpression )
        
        optWhite = ZeroOrMore(LineEnd() | White())
        
        """ Define the statement grammar. """
        statement <<= (
            Group(
                Group(SELECT + t_expr_chain)("concepts") + optWhite + 
                Group(FROM + tableNameList) + optWhite + 
                Group(Optional(WHERE + whereExpression("where"), "")) + optWhite + 
                Group(Optional(SET + setExpression("set"), ""))("select")
            )
            |
            Group(
                SET + (columnName + EQ + ( quotedString | intNum | realNum ))
            )("set")
        )("statement")

        """ Make a program a series of statements. """
        self.program = statement + ZeroOrMore(statement)
        
        """ Make rest-of-line comments. """
        comment = "--" + restOfLine
        self.program.ignore (comment)
Ejemplo n.º 30
0
import logging

from pyparsing import ParserElement, Empty, Word, CharsNotIn, White, Optional, ZeroOrMore, OneOrMore, StringStart, StringEnd, Combine, Group, Suppress, nums, ParseException 

import debug
import modules


logger = None

CTL = ''.join(chr(i) for i in range(0, 32)) + chr(127)
WS = " \t"

ParserElement.setDefaultWhitespaceChars(WS)

_ws = White(WS)

_quoted_pair = Suppress('\\') + CharsNotIn("", exact=1)
_dqtext = CharsNotIn("\"\\" + CTL, exact=1)
_dqstring = Combine(Suppress('"') + ZeroOrMore(_dqtext | _quoted_pair) + Suppress('"'))
_sqtext = CharsNotIn("'\\" + CTL, exact=1)
_sqstring = Combine(Suppress('\'') + ZeroOrMore(_sqtext | _quoted_pair) + Suppress('\''))

_atom = Empty() + CharsNotIn(" '\"\\" + CTL)
_string = Combine(OneOrMore(_dqstring | _sqstring | _quoted_pair))
_word = Combine(OneOrMore(_atom | _string))

_ws_state = ""

def _ws_action(t):
    global _ws_state
Ejemplo n.º 31
0
# floating point
fp = Combine(Word(nums + "+-") + Literal(".") + Word(nums))

# fortran real
exp = oneOf("E e D d")
real = Combine(fp("base") + exp.setParseAction(lambda x: "e") + integer("exponent"))

# C type
char = Word(printables)

# Decks of data
# ------------------------------------------------------------------------------------------
# prelim
data_type = oneOf("R I C")
name_of_deck = LineStart() + OneOrMore(
    Word(printables), stopOn=White(min=3) + data_type
).setParseAction(" ".join)

# single value decks
ival_deck = name_of_deck("key") + Literal("I")("type") + integer("value")
rval_deck = name_of_deck("key") + Literal("R")("type") + real("value")
cval_deck = name_of_deck("key") + Literal("C")("type") + char("value")

# we have to parse this one differently
char_arr_deck = (
    name_of_deck("key")
    + Literal("C")("type")
    + Literal("N=").suppress()
    + integer("size")
    + LineEnd().suppress()
    + Group(SkipTo(LineEnd() + name_of_deck + data_type) | SkipTo(StringEnd()))("value")
Ejemplo n.º 32
0
@author: luca

(Minor updates by Paul McGuire, June, 2012)
'''
from pyparsing import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \
    LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \
    cStyleComment, Regex, Forward, MatchFirst, And, srange, oneOf, alphas, alphanums, \
    delimitedList

# http://www.antlr.org/grammar/ANTLR/ANTLRv3.g

# Tokens
EOL = Suppress(LineEnd())  # $
singleTextString = originalTextFor(
    ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace()
XDIGIT = hexnums
INT = Word(nums)
ESC = Literal('\\') + (oneOf(list(r'nrtbf\">' + "'")) |
                       ('u' + Word(hexnums, exact=4))
                       | Word(printables, exact=1))
LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables,
                                                            exact=1)
CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'")
STRING_LITERAL = Suppress("'") + Combine(
    OneOrMore(LITERAL_CHAR)) + Suppress("'")
DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"'
DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables,
                                                     exact=1)) + '>>'
TOKEN_REF = Word(alphas.upper(), alphanums + '_')
RULE_REF = Word(alphas.lower(), alphanums + '_')
Ejemplo n.º 33
0
>>> for font in tokens.fonts:
...   print font.fontNumber, font.fontFamily, font.fontName, font.panose or 0
0 roman Times New Roman 02020603050405020304
1 modern Courier New 02070309020205020404
2 roman Symbol 05050102010706020507
3 roman Times New Roman (Hebrew) 0
"""
from pyparsing import Optional, Literal, Word, Group, White
from pyparsing import Suppress, Combine, replaceWith
from pyparsing import alphas, nums, printables, alphanums
from pyparsing import restOfLine, oneOf, OneOrMore, ZeroOrMore
from pyparsing import ParseException

separator = Literal(';')
space = Literal(' ')
white = White()
leftBracket = Literal('{')
rightBracket = Literal('}')
bracket = leftBracket | rightBracket.setResultsName('bracket')

# basic RTF control codes, ie. "\labelname3434"
controlLabel = Combine(Word(alphas + "'") + Optional(Word(nums)))
controlValue = Optional(space) + Optional(Word(alphanums + '-'))
baseControl = Combine(Literal('\\') + controlLabel +
                      controlValue).setResultsName('baseControl')

# in some cases (color and font table declarations), control has ';'
# suffix
rtfControl = Combine(baseControl +
                     Optional(separator)).setResultsName('control')