def parser_factory(styler):
    """Builds the JSON parser."""
    LBRK, RBRK, LBRC, RBRC, COLON, DQUO = map(pp.Suppress, '[]{}:"')
    DQUO = styler('class:string', DQUO)

    control_chars = ''.join(map(chr, range(32))) + '\x7f'
    normal_chars = pp.CharsNotIn(control_chars + '\\"')
    s_quo = pp.Literal('\\"').addParseAction(pp.replaceWith('"'))
    s_sol = pp.Literal('\\/').addParseAction(pp.replaceWith('/'))
    s_rsol = pp.Literal('\\\\').addParseAction(pp.replaceWith('\\'))
    s_back = pp.Literal('\\b').addParseAction(pp.replaceWith('\b'))
    s_form = pp.Literal('\\f').addParseAction(pp.replaceWith('\f'))
    s_nl = pp.Literal('\\n').addParseAction(pp.replaceWith('\n'))
    s_ret = pp.Literal('\\r').addParseAction(pp.replaceWith('\r'))
    s_tab = pp.Literal('\\t').addParseAction(pp.replaceWith('\t'))
    s_unicode = pp.Suppress('\\u') + pp.Word(pp.hexnums, exact=4)
    s_unicode.addParseAction(lambda t: chr(int(t[0], 16)))
    escape_seqs = s_quo | s_sol | s_rsol | s_back | s_form | s_nl | s_ret | s_tab | s_unicode
    chars = styler('class:string', normal_chars) | styler(
        'class:escape', escape_seqs)

    skip_white = pp.Optional(pp.Suppress(pp.White()))
    string = skip_white + DQUO - pp.Combine(pp.ZeroOrMore(chars)) + DQUO
    string.leaveWhitespace()
    string.setName('string')

    value = pp.Forward()

    pair = string + COLON + value
    pair.addParseAction(tuple)
    obj = LBRC - pp.Optional(pp.delimitedList(pair)) + pp.NotAny(',') + RBRC
    obj.addParseAction(lambda t: {k: v for k, v in t})
    obj.setName('object')

    array = LBRK - pp.Optional(pp.delimitedList(value)) + pp.NotAny(',') + RBRK
    array.addParseAction(lambda t: [list(t)])
    array.setName('array')

    true = pp.Literal('true').addParseAction(pp.replaceWith(True))
    false = pp.Literal('false').addParseAction(pp.replaceWith(False))
    null = pp.Literal('null').addParseAction(pp.replaceWith(None))
    constant = styler('class:constant', true | false | null)

    value <<= obj | array | string | styler('class:number',
                                            ppc.number) | constant
    value.parseWithTabs()
    value.setName('JSON value')
    return value
Example #2
0
class Parser(object):
    comment_def = "--" + pyparsing.NotAny(
        '-' + pyparsing.CaselessKeyword('begin')) + pyparsing.ZeroOrMore(
            pyparsing.CharsNotIn("\n"))

    def __init__(self, scanner, retainSeparator=True):
        self.scanner = scanner
        self.scanner.ignore(pyparsing.sglQuotedString)
        self.scanner.ignore(pyparsing.dblQuotedString)
        self.scanner.ignore(self.comment_def)
        self.scanner.ignore(pyparsing.cStyleComment)
        self.retainSeparator = retainSeparator

    def separate(self, txt):
        itms = []
        for (sqlcommand, start, end) in self.scanner.scanString(txt):
            if sqlcommand:
                if type(sqlcommand[0]) == pyparsing.ParseResults:
                    if self.retainSeparator:
                        itms.append("".join(sqlcommand[0]))
                    else:
                        itms.append(sqlcommand[0][0])
                else:
                    if sqlcommand[0]:
                        itms.append(sqlcommand[0])
        return itms
Example #3
0
class PortWithProfile(Node):
    """
    Variant of :class:`Port` that is used by "card" records inside
    the "Ports" property. It differs from the normal port syntax by having
    different entries inside the last section. Availability is not listed
    here, only priority. Priority does not have a colon before the actual
    number. This port is followed by profile assignment.
    """
    __fragments__ = {
        'name': 'port-name',
        'label': 'port-label',
        'priority': 'port-priority',
        'latency_offset': 'port-latency-offset',
        'availability': 'port-availability',
        'properties': lambda t: t['port-properties'].asList(),
        'profile_list': lambda t: t['port-profile-list'].asList(),
    }

    __syntax__ = (
        p.Word(p.alphanums + "-;").setResultsName('port-name') +
        p.Suppress(':')
        # This part was very tricky to write. The label is basically arbitrary
        # localized Unicode text. We want to grab all of it in one go but
        # without consuming the upcoming and latest '(' character or the space
        # that comes immediately before.
        #
        # The syntax here combines a sequence of words, as defined by anything
        # other than a space and '(', delimited by a single whitespace.
        + p.Combine(
            p.OneOrMore(~p.FollowedBy(p.Regex('\(.+?\)') + p.LineEnd()) +
                        p.Regex('[^ \n]+') + p.White().suppress()),
            ' ').setResultsName('port-label') + p.Suppress('(') +
        p.Keyword('priority').suppress() + p.Optional(p.Suppress(':')) +
        p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName(
            'port-priority') + p.Optional(
                p.MatchFirst([
                    p.Suppress(',') + p.Keyword('latency offset:').suppress() +
                    p.Word(p.nums).setParseAction(lambda t: int(t[0])) +
                    p.Literal("usec").suppress(),
                    p.Empty().setParseAction(lambda t: '')
                ]).setResultsName('port-latency-offset')) + p.Optional(
                    p.MatchFirst([
                        p.Suppress(',') + p.Literal('not available'),
                        p.Suppress(',') + p.Literal('available'),
                        p.Empty().setParseAction(lambda t: '')
                    ]).setResultsName('port-availability')) + p.Suppress(')') +
        p.LineEnd().suppress() + p.Optional(
            p.MatchFirst([
                p.LineStart().suppress() + p.NotAny(p.White(' ')) +
                p.White('\t').suppress() + p.Keyword('Properties:').suppress()
                + p.LineEnd().suppress() + PropertyAttributeValue,
                p.Empty().setParseAction(lambda t: [])
            ]).setResultsName('port-properties')) +
        p.White('\t', max=3).suppress() +
        p.Literal("Part of profile(s)").suppress() + p.Suppress(":") +
        p.delimitedList(
            p.Word(p.alphanums + "+-:"),
            ", ").setResultsName("port-profile-list")).setResultsName("port")
Example #4
0
class BashHistoryParser(text_parser.PyparsingMultiLineTextParser):
    """Parses events from Bash history files."""

    NAME = u'bash'

    DESCRIPTION = u'Parser for Bash history files'

    _ENCODING = u'utf-8'

    _TIMESTAMP = pyparsing.Suppress(u'#') + pyparsing.Word(
        pyparsing.nums, min=9, max=10).setParseAction(
            text_parser.PyParseIntCast).setResultsName(u'timestamp')

    _COMMAND = pyparsing.Regex(r'.*?(?=($|\n#\d{10}))',
                               re.DOTALL).setResultsName(u'command')

    _LINE_GRAMMAR = _TIMESTAMP + _COMMAND + pyparsing.lineEnd()

    _VERIFICATION_GRAMMAR = (pyparsing.Regex(r'^\s?[^#].*?$', re.MULTILINE) +
                             _TIMESTAMP +
                             pyparsing.NotAny(pyparsing.pythonStyleComment))

    LINE_STRUCTURES = [(u'log_entry', _LINE_GRAMMAR)]

    def ParseRecord(self, mediator, key, structure):
        """Parses a record and produces a Bash history event.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      UnableToParseFile: if an unsupported key is provided.
    """
        if key != u'log_entry':
            raise errors.UnableToParseFile(
                u'Unsupported key: {0:s}'.format(key))
        event = BashHistoryEvent(structure.timestamp, structure.command)
        mediator.ProduceEvent(event)

    def VerifyStructure(self, unused_mediator, line):
        """Verifies that this is a bash history file.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      line (str): single line from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        match_generator = self._VERIFICATION_GRAMMAR.scanString(line,
                                                                maxMatches=1)
        return bool(list(match_generator))
Example #5
0
class GenericListAttribute(Node):

    __fragments__ = {
        'name': 'attribute-name',
        'value': lambda t: t['attribute-value'].asList()
    }

    __syntax__ = (p.LineStart().suppress() + p.NotAny(p.White(' ')) +
                  p.Optional(p.White('\t')).suppress() + AttributeName +
                  p.Literal(':').suppress() + p.LineEnd().suppress() +
                  GenericListAttributeValue).setResultsName("attribute")
Example #6
0
def craft_parse(text):
    """
	"""
    LineComment = pyp.Combine(pyp.Literal('::') + pyp.restOfLine).suppress()
    BlockComment = pyp.Combine(
        pyp.Literal(':>') + pyp.SkipTo(pyp.Literal('<:')) +
        pyp.Literal('<:')).suppress()
    Comment = BlockComment | LineComment

    BlockComment = pyp.Combine(
        pyp.Literal(':<') + pyp.Combine(
            pyp.NotAny(pyp.Literal(':>')) + pyp.Word(pyp.printables + ' ')) +
        pyp.Literal('>:'))

    Identifier = pyp.Word(pyp.alphanums + '!#$%&()*+,./;<=>?@\\^-_`{|}~')
    Value = (Comment
             | pyp.QuotedString('"')
             | pyp.QuotedString("'")
             | Identifier.addParseAction(_type_cast_value))
    LBRACKET, RBRACKET, COLON = map(pyp.Suppress, '[]:')

    Function = pyp.Forward()
    List = pyp.Forward()

    Function << pyp.Dict(
        pyp.Group(Identifier + pyp.Literal(':') +
                  pyp.Group(LBRACKET + pyp.ZeroOrMore(Comment | Function | List
                                                      | Value) + RBRACKET)))

    List << pyp.Group(LBRACKET + pyp.ZeroOrMore(Comment | Function | List
                                                | Value) + RBRACKET)

    Program = pyp.OneOrMore(Comment | Function)

    # Validate for syntax error messages:
    validator = SourceValidator()
    Value.setParseAction(validator.validate)
    List.setParseAction(validator.validate)
    Identifier.addParseAction(validator.validate)
    #Comment.setParseAction(validator.validate)
    Function.setParseAction(validator.validate)
    Program.setParseAction(validator.validate)

    syntax_error = None
    try:
        return __walk(Program.parseString(text)[0])
    except Exception as e:
        syntax_error = validator.panic()

    # Now raise the exception with a clean stack trace
    raise syntax_error
class Record(Node):
    """
    Single standalone entry of `pactl list`.

    The record is composed of a name and a list of attributes.  Pulseaudio
    exposes objects such as cards, sinks and sources as separate records.

    Each attribute may be of a different type. Some attributes are simple
    values while others have finer structure, including lits and even
    additional recursive attributes.
    """

    __fragments__ = {
        'name': 'record-name',
        'attribute_list': lambda t: t['record-attributes'].asList(),
        'attribute_map': lambda t: OrderedDict(
            (attr.name, attr)
            for attr in t['record-attributes'].asList()),
    }

    __syntax__ = (
        p.LineStart()
        + p.NotAny(p.White(' \t'))
        + p.Regex("[A-Z][a-zA-Z ]+ #[0-9]+").setResultsName("record-name")
        + p.LineEnd().suppress()
        + p.OneOrMore(
            p.Or([
                GenericListAttribute.Syntax,
                GenericSimpleAttribute.Syntax,
            ])
        ).setResultsName("record-attributes")
    ).setResultsName("record")

    def as_json(self):
        return {
            'name': self.name,
            'attribute_list': self.attribute_list,
        }

    def __repr__(self):
        # Custom __repr__ that skips attribute_map
        return "{}({})".format(
            type(self).__name__, ", ".join([
                "{}={!r}".format(attr, getattr(self, attr))
                for attr in ['name', 'attribute_list']]))
Example #8
0
# ------
# define base parser for between expression
between_cond = pp.Group(name + pp.CaselessLiteral('between').setResultsName('operator') +
                        value.setResultsName('value1') + pp.CaselessLiteral('and') +
                        value.setResultsName('value2')).setResultsName('between_condition')

# -------
# define base parser for functions
ppc = pp.pyparsing_common

# parantheses
LPAR = pp.Suppress('(')
RPAR = pp.Suppress(')')

# function arguments
arglist = pp.delimitedList(number | (pp.Word(pp.alphanums + '-_') + pp.NotAny('=')))
args = pp.Group(arglist).setResultsName('args')
# function keyword arguments
key = ppc.identifier() + pp.Suppress('=')
values = (number | pp.Word(pp.alphas))
keyval = pp.dictOf(key, values)
kwarglist = pp.delimitedList(keyval)
kwargs = pp.Group(kwarglist).setResultsName('kwargs')
# build generic function
fxn_args = args + ',' + kwargs | pp.Optional(args, default='') + pp.Optional(kwargs, default='')
fxn_name = (pp.Word(pp.alphas)).setResultsName('name')
fxn = pp.Group(fxn_name + LPAR + fxn_args + RPAR).setResultsName('function')

# fxn condition
fxn_cond = pp.Group(fxn + operator + value).setResultsName('function_condition')
Example #9
0
    p.Literal("/") + p.Word(p.nums, exact=4))

postcode_district = p.Combine(
    p.Word(p.alphas, min=1, max=2) +
    ((p.Word(p.nums, exact=1) + p.Word(p.alphas, exact=1))
     | p.Word(p.nums, min=1, max=2))) + p.Suppress(p.White())

offence_code = p.Combine(p.Word(p.alphas, exact=2) +
                         p.Word(p.nums, min=3)) + p.Suppress(p.White())

printed_by_line = p.Group(p.LineStart() + p.Literal("Printed By") +
                          p.SkipTo(p.Literal("Page No.:")) +
                          p.SkipTo(p.LineEnd())).setResultsName("printed_by")

first_case_line = p.Group(
    p.LineStart() + p.NotAny(p.White()) + p.Word(p.nums) +
    p.SkipTo(p.LineEnd())).setResultsName("first_case_line")

heading_block = p.Group(
    p.SkipTo(p.LineStart() + p.NotAny(p.White()) + p.Literal("Block:")) +
    p.SkipTo(p.LineEnd())).setResultsName("heading_block")

document = heading_block + p.OneOrMore(
    p.Group(p.SkipTo(first_case_line
                     | printed_by_line)).setResultsName("main_body") +
    (first_case_line | (printed_by_line + p.Optional(heading_block))))


def parse_court_docs(data):
    case_data = []
number = pp.Regex(r"[+-~]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?")
name = pp.Word(pp.alphas + '._',
               pp.alphanums + '._').setResultsName('parameter')
#operator = pp.Regex("==|!=|<=|>=|<|>|=|&|~|||").setResultsName('operator')
operator = pp.oneOf(['==', '<=', '<', '>', '>=', '=', '!=', '&',
                     '|']).setResultsName('operator')
value = (pp.Word(pp.alphanums + '-_.*') | pp.QuotedString('"')
         | number).setResultsName('value')

# list of numbers
nl = pp.delimitedList(number, combine=True)
narr = pp.Combine('[' + nl + ']')

# function arguments
arglist = pp.delimitedList(number
                           | (pp.Word(pp.alphanums + '-_') + pp.NotAny('='))
                           | narr)
args = pp.Group(arglist).setResultsName('args')
# function keyword arguments
key = pp.Word(pp.alphas) + pp.Suppress('=')
values = (number | pp.Word(pp.alphas))
keyval = pp.dictOf(key, values)
kwarglist = pp.delimitedList(keyval)
kwargs = pp.Group(kwarglist).setResultsName('kwargs')
# build generic function
fxn_args = pp.Optional(args) + pp.Optional(kwargs)
fxn_name = (pp.Word(pp.alphas)).setResultsName('fxn')
fxn = pp.Group(fxn_name + LPAR + fxn_args + RPAR)

# overall (recursvie) where clause
whereexp = pp.Forward()
Example #11
0
    def parse_specialnets(self):
        EOL = pp.LineEnd().suppress()
        linebreak = pp.Suppress(";" + pp.LineEnd())
        identifier = pp.Word(
            pp.alphanums +
            '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~')  # CONFLICT with '();'
        number = pp.pyparsing_common.number
        word = pp.Word(pp.alphas)
        LPAR = pp.Suppress('(')
        RPAR = pp.Suppress(')')
        ORIENT = (pp.Keyword('N')
                  | pp.Keyword('S')
                  | pp.Keyword('E')
                  | pp.Keyword('W')
                  | pp.Keyword('FN')
                  | pp.Keyword('FS')
                  | pp.Keyword('FE')
                  | pp.Keyword('FW'))
        pt = LPAR + pp.OneOrMore(number
                                 | pp.Keyword('*')) + RPAR  # pair of x,y
        specialnets_id = pp.Suppress(pp.Keyword('SPECIALNETS'))
        end_specialnets_id = pp.Keyword("END SPECIALNETS").suppress()
        begin_specialnet = pp.Suppress(pp.Keyword('-'))
        ws_snet = pp.Suppress(pp.Keyword('+'))  # parameter division in NETS

        # netName
        netName_1 = pp.Group(
            LPAR + identifier('compName') + identifier('pinName') +
            pp.Optional(ws_snet + pp.Keyword('SYNTHESIZED'))('SYNTHESIZED') +
            RPAR)
        netName = identifier('netName') + pp.ZeroOrMore(
            netName_1).setResultsName('nets')

        # MASK
        MASK = pp.Group(pp.Keyword('MASK') +
                        number('maskNum')).setResultsName('MASK')

        MASK_id = pp.Keyword('MASK')
        RECT_id = pp.Keyword('RECT')
        VIRTUAL_id = pp.Keyword('VIRTUAL')
        routingPoints_1 = pp.Optional(MASK('MASK') +
                                      number('maskNum')) + pp.Group(pt)
        routingPoints_2 = pp.Optional(MASK_id('MASK') + number('viaMaskNum')) + pp.NotAny(pp.Keyword('NEW') | pp.Keyword('RECT')) \
                                    + identifier('viaName') + pp.Optional(ORIENT('orient')) \
                        + pp.Optional(pp.Suppress(pp.Keyword('DO')) + number('numX') + pp.Suppress(pp.Keyword('BY')) + number('numY')
                                    + pp.Suppress(pp.Keyword('STEP')) + number('stepX') + number('stepY'))
        routingPoints = (pp.Group(pt) +
                         pp.OneOrMore(routingPoints_1 | routingPoints_2))

        specialWiring_placement = (ws_snet + (
            (pp.Keyword('COVER')('PLACEMENT'))
            | (pp.Keyword('FIXED')('PLACEMENT'))
            | (pp.Keyword('ROUTED')('PLACEMENT'))
            |
            (pp.Keyword('SHIELD')('PLACEMENT') + identifier('shieldNetName'))))

        specialWiring_1 = (
            pp.Optional(specialWiring_placement) +
            pp.Optional(ws_snet + pp.Keyword('SHAPE') +
                        identifier('shapeType')) +
            pp.Optional(ws_snet + pp.Keyword('MASK') + number('maskNum')) +
            ((ws_snet + pp.Keyword('POLYGON') + identifier('layerName') +
              pp.OneOrMore(pt))
             |
             (ws_snet + pp.Keyword('RECT') + identifier('layerName') + pt + pt)
             | (ws_snet + pp.Keyword('VIA') + identifier('viaName') +
                pp.Optional(ORIENT('orient')) + pp.OneOrMore(pt))))

        SHAPE_elems = (pp.Keyword('RING') | pp.Keyword('PADRING')
                       | pp.Keyword('BLOCKRING')
                       | pp.Keyword('STRIPE') | pp.Keyword('FOLLOWPIN')
                       | pp.Keyword('IOWIRE')
                       | pp.Keyword('COREWIRE') | pp.Keyword('BLOCKWIRE')
                       | pp.Keyword('BLOCKAGEWIRE')
                       | pp.Keyword('FILLWIRE') | pp.Keyword('FILLWIREOPC')
                       | pp.Keyword('DRCFILL'))

        specialWiring_2 = (
            specialWiring_placement + identifier('layerName') +
            number('routeWidth') +
            pp.Optional(ws_snet + pp.Keyword('SHAPE') + SHAPE_elems('SHAPE')) +
            pp.Optional(ws_snet + pp.Keyword('STYLE') + number('styleNum')) +
            routingPoints('routingPoints') + pp.Group(
                pp.ZeroOrMore(
                    pp.Group(
                        pp.Keyword('NEW') + identifier('layerName') +
                        number('routeWidth') +
                        pp.Optional(ws_snet + pp.Keyword('SHAPE') +
                                    SHAPE_elems('SHAPE')) +
                        pp.Optional(ws_snet + pp.Keyword('STYLE') +
                                    identifier('styleNum')) +
                        routingPoints('routingPoints')))))('NEW')

        specialWiring = pp.Group(
            pp.OneOrMore(specialWiring_1 | specialWiring_2))('specialWiring')

        VOLTAGE = ws_snet + pp.Keyword('VOLTAGE') + number('VOLTAGE')

        SOURCE = ws_snet + pp.Keyword('SOURCE') + (
            pp.Keyword('DIST') | pp.Keyword('NETLIST') | pp.Keyword('TIMING')
            | pp.Keyword('USER'))

        FIXEDBUMP = ws_snet + pp.Keyword('FIXEDBUMP')('FIXEDBUMP')

        ORIGINAL = ws_snet + pp.Keyword('ORIGINAL') + identifier(
            'ORIGINAL_netName')

        USE_ids = (pp.Keyword('ANALOG') | pp.Keyword('CLOCK')
                   | pp.Keyword('GROUND') | pp.Keyword('POWER')
                   | pp.Keyword('RESET') | pp.Keyword('SCAN')
                   | pp.Keyword('SIGNAL') | pp.Keyword('TIEOFF'))
        USE = ws_snet + pp.Keyword('USE') + USE_ids('USE')

        PATTERN_ids = (pp.Keyword('BALANCED') | pp.Keyword('STEINER')
                       | pp.Keyword('TRUNK') | pp.Keyword('WIREDLOGIC'))
        PATTERN = ws_snet + pp.Keyword('PATTERN') + PATTERN_ids('PATTERN')

        ESTCAP = ws_snet + pp.Keyword('ESTCAP') + number(
            'ESTCAP_wireCapacitance')

        WEIGHT = ws_snet + pp.Keyword('WEIGHT') + number('WEIGHT')

        PROPERTY = pp.Group(ws_snet + pp.Keyword('PROPERTY') + pp.OneOrMore(
            identifier('propName') + number('propVal')))('PROPERTY')

        specialnet = pp.Group(begin_specialnet + netName +
                              pp.Optional(VOLTAGE) +
                              pp.ZeroOrMore(specialWiring) +
                              pp.Optional(SOURCE) + pp.Optional(FIXEDBUMP) +
                              pp.Optional(ORIGINAL) + pp.Optional(USE) +
                              pp.Optional(PATTERN) + pp.Optional(ESTCAP) +
                              pp.Optional(WEIGHT) + pp.ZeroOrMore(PROPERTY) +
                              linebreak).setResultsName('specialnets',
                                                        listAllMatches=True)

        specialnets = pp.Group(specialnets_id + number('numNets') + linebreak +
                               pp.ZeroOrMore(specialnet) +
                               pp.Suppress(end_specialnets_id)).setResultsName(
                                   'SPECIALNETS')

        return specialnets
Example #12
0
class SkyDriveOldLogParser(text_parser.PyparsingSingleLineTextParser):
  """Parse SkyDrive old log files."""

  NAME = 'skydrive_log_old'
  DESCRIPTION = 'Parser for OneDrive (or SkyDrive) old log files.'

  _ENCODING = 'utf-8'

  _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS
  _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS

  # Common pyparsing objects.
  _COLON = pyparsing.Literal(':')
  _EXCLAMATION = pyparsing.Literal('!')

  # Date and time format used in the header is: DD-MM-YYYY hhmmss.###
  # For example: 08-01-2013 21:22:28.999
  _DATE_TIME = pyparsing.Group(
      _TWO_DIGITS.setResultsName('month') + pyparsing.Suppress('-') +
      _TWO_DIGITS.setResultsName('day_of_month') + pyparsing.Suppress('-') +
      _FOUR_DIGITS.setResultsName('year') +
      text_parser.PyparsingConstants.TIME_MSEC_ELEMENTS).setResultsName(
          'date_time')

  _SOURCE_CODE = pyparsing.Combine(
      pyparsing.CharsNotIn(':') +
      _COLON +
      text_parser.PyparsingConstants.INTEGER +
      _EXCLAMATION +
      pyparsing.Word(pyparsing.printables)).setResultsName('source_code')

  _LOG_LEVEL = (
      pyparsing.Literal('(').suppress() +
      pyparsing.SkipTo(')').setResultsName('log_level') +
      pyparsing.Literal(')').suppress())

  _LINE = (
      _DATE_TIME + _SOURCE_CODE + _LOG_LEVEL +
      _COLON + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text'))

  # Sometimes the timestamped log line is followed by an empty line,
  # then by a file name plus other data and finally by another empty
  # line. It could happen that a logline is split in two parts.
  # These lines will not be discarded and an event will be generated
  # ad-hoc (see source), based on the last one if available.
  _NO_HEADER_SINGLE_LINE = (
      pyparsing.NotAny(_DATE_TIME) +
      pyparsing.Optional(pyparsing.Literal('->').suppress()) +
      pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text'))

  # Define the available log line structures.
  LINE_STRUCTURES = [
      ('logline', _LINE),
      ('no_header_single_line', _NO_HEADER_SINGLE_LINE),
  ]

  def __init__(self):
    """Initializes a parser object."""
    super(SkyDriveOldLogParser, self).__init__()
    self._last_date_time = None
    self._last_event_data = None
    self.offset = 0

  def _ParseLogline(self, parser_mediator, structure):
    """Parse a logline and store appropriate attributes.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
    # TODO: Verify if date and time value is locale dependent.
    month, day_of_month, year, hours, minutes, seconds, milliseconds = (
        structure.date_time)

    time_elements_tuple = (
        year, month, day_of_month, hours, minutes, seconds, milliseconds)

    try:
      date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
          time_elements_tuple=time_elements_tuple)
    except ValueError:
      parser_mediator.ProduceExtractionWarning(
          'invalid date time value: {0!s}'.format(structure.date_time))
      return

    event_data = SkyDriveOldLogEventData()
    event_data.log_level = structure.log_level
    event_data.offset = self.offset
    event_data.source_code = structure.source_code
    event_data.text = structure.text

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_ADDED)
    parser_mediator.ProduceEventWithEventData(event, event_data)

    self._last_date_time = date_time
    self._last_event_data = event_data

  def _ParseNoHeaderSingleLine(self, parser_mediator, structure):
    """Parse an isolated header line and store appropriate attributes.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
    if not self._last_event_data:
      logger.debug('SkyDrive, found isolated line with no previous events')
      return

    event_data = SkyDriveOldLogEventData()
    event_data.offset = self._last_event_data.offset
    event_data.text = structure.text

    event = time_events.DateTimeValuesEvent(
        self._last_date_time, definitions.TIME_DESCRIPTION_ADDED)
    parser_mediator.ProduceEventWithEventData(event, event_data)

    # TODO think to a possible refactoring for the non-header lines.
    self._last_date_time = None
    self._last_event_data = None

  def ParseRecord(self, parser_mediator, key, structure):
    """Parse each record structure and return an EventObject if applicable.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key not in ('logline', 'no_header_single_line'):
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    if key == 'logline':
      self._ParseLogline(parser_mediator, structure)

    elif key == 'no_header_single_line':
      self._ParseNoHeaderSingleLine(parser_mediator, structure)

  def VerifyStructure(self, parser_mediator, line):
    """Verify that this file is a SkyDrive old log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
    try:
      structure = self._LINE.parseString(line)
    except pyparsing.ParseException:
      logger.debug('Not a SkyDrive old log file')
      return False

    day_of_month, month, year, hours, minutes, seconds, milliseconds = (
        structure.date_time)

    time_elements_tuple = (
        year, month, day_of_month, hours, minutes, seconds, milliseconds)

    try:
      dfdatetime_time_elements.TimeElementsInMilliseconds(
          time_elements_tuple=time_elements_tuple)
    except ValueError:
      logger.debug(
          'Not a SkyDrive old log file, invalid date and time: {0!s}'.format(
              structure.date_time))
      return False

    return True
Example #13
0
    def __init__(self,
                 base_freq=440.0,
                 amplitude=.5,
                 max_gain=10.,
                 min_gain=-200.,
                 new_scale='C/a',
                 clef='violin'):

        # an important constant value for the conversion of musical halt tone steps to frequency values
        # is the twelfth root of 2
        self.__root__ = 1.0594630943592952645618252949463  # (2 ** (1 / 12))

        # *** parser definitions ***
        # helper
        no_whites = pp.NotAny(pp.White())
        tok_end = (pp.StringEnd() | pp.LineEnd()).suppress()

        # numbers
        real = pp.Combine(
            pp.Word(pp.nums) +
            pp.Optional(pp.Char(',.') + pp.Word(pp.nums))).setParseAction(
                lambda t: float(t[0].replace(',', '.')))

        integer = (pp.Optional(pp.Literal('-')) +
                   pp.Word(pp.nums)).setParseAction(
                       lambda t: int(t[0] + t[1]) if len(t) > 1 else int(t[0]))

        # signs
        must_sign = pp.Char('+-').setParseAction(lambda t: float(t[0] + '1'))
        may_sign = pp.Optional(pp.Char('+-')).setParseAction(
            lambda t: float(t[0] + '1' if len(t) > 0 else '1'))

        # note value cents
        cent = (must_sign + no_whites +
                real).setParseAction(lambda t: t[0] * t[1] / 100)

        # helpers for the note name parser
        note_name_offset = {
            'C': -9,
            'D': -7,
            'E': -5,
            'F': -4,
            'G': -2,
            'A': 0,
            'B': 2,
        }
        note_name = pp.Char('CDEFGABcdefgab').setParseAction(
            lambda t: note_name_offset[t[0]
                                       if t[0] in 'CDEFGAB' else t[0].upper()])

        flat_sharp = pp.Char('#b').setParseAction(lambda t: 1
                                                  if t[0] == '#' else -1)
        octave = pp.Char('0123456789').setParseAction(lambda t:
                                                      (int(t[0]) - 4) * 12)
        full_note = (note_name + no_whites +
                     pp.Optional(pp.FollowedBy(flat_sharp) + flat_sharp) +
                     no_whites + pp.FollowedBy(octave) +
                     octave).setParseAction(lambda t: sum(t))

        self.note_name_parser = (
            full_note + pp.Optional(pp.White()).suppress() +
            pp.Optional(cent) + tok_end
        ).setParseAction(lambda t: float(sum(t))).setResultsName('note_value')

        # frequency parsers
        hertz = real + pp.Literal('Hz').suppress()

        self.frequency_parser = (hertz + tok_end).setParseAction(
            lambda t: float(t[0])).setResultsName('frequency')

        self.base_freq_parser = (
            full_note + pp.Literal('=').suppress() + hertz + tok_end
        ).setParseAction(lambda t: t[1] * (1.0594630943592952645618252949463**
                                           -t[0])).setResultsName('base_freq')

        # parses a string like "sc -7:b" into a musical half tone step (using the MusicConverter.set method)
        sign = (pp.Keyword('##') | pp.Keyword('bb') | pp.Keyword('#')
                | pp.Keyword('b') | pp.Keyword('n') | pp.Keyword('_'))
        self.score_parser = (integer + pp.Literal(':').suppress() + sign +
                             tok_end).setResultsName('notation')

        # amplitude parser
        self.amp_parser = (
            real + pp.Literal('%').suppress() + tok_end
        ).setParseAction(lambda t: float(t[0])).setResultsName('amplitude')

        self.gain_parser = (
            may_sign + real + pp.Literal('dB').suppress() + tok_end
        ).setParseAction(lambda t: float(t[0] * t[1])).setResultsName('gain')

        # clef parser
        self.clef_parser = (pp.Keyword('violin') | pp.Keyword('alto')
                            | pp.Keyword('bass')).setResultsName('clef')

        # key parser
        key_token = pp.NoMatch()
        for key in self.keys:
            key_token = key_token | pp.Keyword(key)

        self.key_parser = (key_token).setResultsName('key')

        # complete parser
        self.input_parser = self.note_name_parser | \
                            self.frequency_parser | \
                            self.base_freq_parser | \
                            self.amp_parser | \
                            self.gain_parser | \
                            self.clef_parser | \
                            self.key_parser | \
                            self.score_parser

        # *** initializations ***
        self.__note_value__ = 0.
        self.__base_freq__ = 440.
        self.base_freq = base_freq

        self.key = new_scale
        self.__names__ = 'C D EF G A B'
        self.clef = clef
        self.__clef__ = 'violin'

        self.max_gain = max_gain
        self.min_gain = min_gain
        self.amplitude = amplitude
Example #14
0
import pyparsing as pp


########## Finding Identifiers

RESERVED_WORDS = [ "abstract", "baremodule", "begin", "bitstype", "break", "catch", "ccall", "const", "continue", "do", "else", "elseif", "end", "export", "finally", "for", "function", "global", "if", "immutable", "import", "importall", "in", "let", "local", "macro", "module", "quote", "return", "try", "type", "typealias", "using", "while"]
pp_reserved_word = pp.Or([pp.Literal(ww) for ww in RESERVED_WORDS])
pp_identifier = (pp.NotAny(pp_reserved_word)
                + pp.Word(pp.alphanums + "@" + "!"+"_"))

TRANSPERENT_PREFIXES = ["@inline", "const"]
pp_transperent_prefix = pp.Optional(pp.Or(
    [pp.Literal(ww) for ww in TRANSPERENT_PREFIXES])).suppress()


def _matched_only(matched):
    return[match for matchgrp in matched
                   for match in matchgrp[0]]


def get_exports(raw_text):
    pp_exports =  (pp.Literal("export").suppress()
                   + pp.delimitedList(pp_identifier)
                  )

    parsed_exports = pp_exports.scanString(raw_text)
    return _matched_only(parsed_exports)


#TODO Use actual scoping, to determine what is at global scope, rather than looking for things lont intented
Example #15
0
class BashHistoryParser(text_parser.PyparsingMultiLineTextParser):
  """Parses events from Bash history files."""

  NAME = 'bash'

  DESCRIPTION = 'Parser for Bash history files'

  _ENCODING = 'utf-8'

  _TIMESTAMP = pyparsing.Suppress('#') + pyparsing.Word(
      pyparsing.nums, min=9, max=10).setParseAction(
          text_parser.PyParseIntCast).setResultsName('timestamp')

  _COMMAND = pyparsing.Regex(
      r'.*?(?=($|\n#\d{10}))', re.DOTALL).setResultsName('command')

  _LINE_GRAMMAR = _TIMESTAMP + _COMMAND + pyparsing.lineEnd()

  _VERIFICATION_GRAMMAR = (
      pyparsing.Regex(r'^\s?[^#].*?$', re.MULTILINE) + _TIMESTAMP +
      pyparsing.NotAny(pyparsing.pythonStyleComment))

  LINE_STRUCTURES = [('log_entry', _LINE_GRAMMAR)]

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a record and produces a Bash history event.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key != 'log_entry':
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    event_data = BashHistoryEventData()
    event_data.command = self._GetValueFromStructure(structure, 'command')

    timestamp = self._GetValueFromStructure(structure, 'timestamp')
    date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp)
    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_MODIFICATION)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  # pylint: disable=unused-argument
  def VerifyStructure(self, parser_mediator, lines):
    """Verifies that this is a bash history file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between
          parsers and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
    match_generator = self._VERIFICATION_GRAMMAR.scanString(lines, maxMatches=1)
    return bool(list(match_generator))
Example #16
0
StartTimeLineComment = (
    StartTimeComment + pp.Combine(
        pp.Combine(
            pp.Word(pp.nums) + pp.Literal("-") + pp.Word(pp.nums) +
            pp.Literal("-") + pp.Word(pp.nums)) + pp.Combine(
                pp.Word(pp.nums) + pp.Literal(":") + pp.Word(pp.nums) +
                pp.Literal(":") + pp.Word(pp.nums) + pp.Literal(".") +
                pp.Word(pp.nums) + pp.Literal(".") + pp.Word(pp.nums)),
        joinString=" ",
        # TODO: Fix this hack
        adjacent=False,
    ).setResultsName("StartTimeLineComment") + pp.LineEnd())

LineComment = pp.Group(
    pp.NotAny(pp.Or(FileVersion ^ StartTime ^ Columns
                    ^ StartTimeLineComment)) + pp.Literal(";") +
    pp.Regex(r".*") + pp.LineEnd()).setResultsName("LineComment",
                                                   listAllMatches=True)

Header = FileVersion + StartTime + Columns

# [N],O,T,[B],I,d,[R],l/L,D
ColumnBusNumber = pp.Or(
    pp.Literal("1") ^ pp.Literal("2") ^ pp.Literal("3") ^ pp.Literal("4")
    ^ pp.Literal("5") ^ pp.Literal("6") ^ pp.Literal("7") ^ pp.Literal("8")
    ^ pp.Literal("9") ^ pp.Literal("10") ^ pp.Literal("11") ^ pp.Literal("12")
    ^ pp.Literal("13") ^ pp.Literal("14") ^ pp.Literal("15") ^ pp.Literal("16")
    ^ pp.Literal("-"))

ColumnDirection = pp.Or(pp.Literal("Rx") ^ pp.Literal("Tx"))
Example #17
0
import pyparsing as pp

pp_identifier = (
    # keywords is not identifier,
    pp.NotAny(
        pp.Keyword("void")
        | pp.Keyword("unsigned")
        | pp.Keyword("signed")
        | pp.Keyword("int")
        | pp.Keyword("float")
        | pp.Keyword("const")
        | pp.Keyword("volatile")
        | pp.Keyword("extern")
        | pp.Keyword("static")) +
    pp.Word(pp.alphas + "_", pp.alphanums + "_", asKeyword=True))
#pp_identifier = pp.Word(pp.alphas+"_", pp.alphanums+"_")
pp_semicolon = pp.Literal(";")


def get_type_spec(p):
    print("get_type_spec: " + str(p))


pp_type_spec = (pp.Keyword("void")
                | (pp.Optional(pp.Keyword("unsigned") | pp.Keyword("signed")) +
                   pp.Keyword("int"))
                | pp.Keyword("float")
                | pp_identifier).setParseAction(get_type_spec)
pp_type_qual = (pp.Keyword("const") | pp.Keyword("volatile"))
pp_strage_spec = (pp.Keyword("extern") | pp.Keyword("static"))
decl_spec = (pp.Optional(pp_type_qual)
Example #18
0
from pathlib import Path
from sys import stderr, exit
import pyparsing as pp
from pyparsing import (Suppress, OneOrMore, Forward, Word, alphanums, Group,
                       SkipTo, Keyword, alphas, Combine, Optional, Literal,
                       delimitedList, ZeroOrMore, infixNotation, opAssoc,
                       oneOf, FollowedBy, pythonStyleComment, ungroup,
                       ParserElement, ParseResults, printables)
from pyparsing import pyparsing_common as ppc

ParserElement.enablePackrat()

LBRACE, \
    RBRACE, LBRACK, RBRACK, EQ, COLON,\
    SEMICOLON, COMMA, LPAR, RPAR, RAWPREFIX = map(Suppress, "{}[]=:;,()$")
NoWhite = pp.NotAny(pp.White())

kws = oneOf("and or not id true false")

VARNAME = Word(alphas.lower(), alphanums + "_").ignore(kws)
# TODO reserve keywords in IDENTIFIER
IDENTIFIER = Word(alphas.upper(), alphanums).ignore(Keyword("Skip"))
EXTERN = Combine(Literal("_") + VARNAME)


def konst(val):
    def f(*args, **kwargs):
        return val

    return f
Example #19
0
def parse(s):
    orig_s = s

    rules_to_execute = []

    def mk_agent_set(s):
        return ('agent', s)

    #given a regex and a input return true or false if regex accepts input
    def mkroot(regex, y):
        if re.compile("^" + regex + "$", flags=re.DOTALL).match(y) != None:
            return True
        else:
            return False

        #return x == y

    assert (mkroot("abc", "abc") == True)
    assert (mkroot("abc", "abcd") == False)
    assert (mkroot("abc.*", "abcd") == True)
    assert (mkroot("abc", "ab") == False)
    assert (mkroot("/g/comp/105/.*", "/g/comp/105/") == True)
    assert (mkroot("/g/comp/105/.*", "/g/comp/160/") == False)

    #given a path object, return path function: string -> boolean
    def mkf(x):
        path = x["path"][0]
        return lambda y: mkroot(path, y)

    assert (mkf({'path': ['abc']})('abc') == True)
    assert (mkf({'path': ['abc']})('abcd') == False)
    assert (mkf({'path': ['abc']})('ab') == False)
    assert (mkf({'path': ["/g/comp/105/.*"]})("/g/comp/105/foo") == True)

    #given a list of path objects return a list of path functions
    def mkchain(xs):
        if type(xs) != type([]):
            xs = [xs]
        fs = map(mkf, xs)
        return fs

    #given a list of path objects return a disjunctive path function
    # list(paths) -> function: string -> bool
    def mk(xs):
        fs = mkchain(xs)
        return lambda path: True in map(lambda g: g(path), fs)

    assert (mk([{'path': ['abc']}])("abc") == True)
    assert (mk([{'path': ['abc']}])("abcd") == False)
    assert (mk([{'path': ['abc']}, {'path': ['abcd']}])("abcd") == True)

    #given a pair of functions (a,b), return a function which returns true if either (a or b) returns true
    def mk_or(a, b):
        #or two functions
        def f_or(x, a, b):
            if a(x):
                return True
            else:
                return b(x)

        return lambda y: f_or(y, a, b)

    #given a pair of functions (a,b), return a function which returns true if both (a and b) return true
    def mk_and(a, b):
        #and two functions
        def f_and(x, a, b):
            if a(x):
                return b(x)
            else:
                return False

        return lambda y: f_and(y, a, b)

    #given a fragment of the AST, return a function.
    def path2function(r):
        #print("rec",r)
        if "pathset" in r:
            v = r["pathset"][0]
            #print(v)
            return mk(v)
        elif "pathvar" in r:
            key = r["pathvar"][0]
            #print("key",key,env[key])
            v = env[key]
            return v
        elif "agentvar" in r:
            #print("path2function, agentvar",r)
            #key = r["agentvar"][0]
            key = r["agentvar"]

            #print("key",key,env["agent_"+key])
            v = env["agent_" + key]
            return v
        elif "agentset" in r:
            #print("path2function agentset",r)
            #v= r["agentset"][0]
            v = r["agentset"]
            #print("agentset..",v)
            return v
        elif "agentapplication" in r:
            a = r["agentapplication"]
            lhs = a[0]["lhs"][0]
            v1 = path2function(lhs)
            #print("LHS of agent application",lhs,path2function(lhs))
            #case where we just have a single operand and parens
            if len(a) == 1:
                return v1  #in this case just get the value
            rhs = a[2]["rhs"][0]
            #print("RHS of agent application",rhs,path2function(rhs))
            op = a[1]["op"][0]
            v2 = path2function(rhs)

            if op == "or":
                return v1.union(v2)
            elif op == "and":
                return v1.intersection(v2)
            elif op == "minus":
                return v1.difference(v2)
            else:
                return lambda x: None

            print("error")
            1 / 0
        else:
            if "application" in r:
                a = r["application"]
                lhs = a[0]["lhs"][0]
                v1 = path2function(lhs)

                #case where we just have a single operand and parens
                if len(a) == 1:
                    return v1

                rhs = a[2]["rhs"][0]
                op = a[1]["op"][0]

                v2 = path2function(rhs)

                if op == "or":
                    return mk_or(v1, v2)
                elif op == "and":
                    return mk_and(v1, v2)
                elif op == "minus":
                    return mk_and(v1, lambda x: not v2(x))
                else:
                    return lambda x: None
            else:
                return lambda x: None

    def agent2f(n):
        #print("making " + str(n) + " into a f")
        #return n
        return "making " + str(n) + " into a f"

    #===========================

    def parseToDict(x, lbl):
        r = x.asList()
        return {lbl: r}

    def f(lbl):
        return lambda x: parseToDict(x, lbl)

    def fprim(lbl):
        return lambda x: {lbl: x.asList()[0]}

    PATH = pp.Word(pp.alphanums + "+-./*?")("PATH")
    PATH.setParseAction(f("path"))

    LP = pp.Literal("(")("LP")
    RP = pp.Literal(")")("RP")
    LBRACE = pp.Word("{")("{")
    RBRACE = pp.Word("}")("}")
    LBRACK = pp.Word("[")("[")
    RBRACK = pp.Word("]")("]")
    EQ = pp.Word("=")

    OPS = pp.Word("and") | pp.Word("or") | pp.Word("minus")
    OP = OPS("OP")
    OP.setParseAction(f("op"))

    PATHBODY = pp.delimitedList(PATH, delim=',')("PATHS")
    PATHBODY.setParseAction(lambda s, l, t: t)

    PATHOPTION = pp.Group(pp.Suppress(LBRACE) + PATHBODY + pp.Suppress(RBRACE))
    PATHOPTION.setParseAction(lambda s, l, t: t[0])

    PATHSET = pp.Group(pp.Or([PATHOPTION, PATH]))
    #PATHSET.setParseAction(lambda x:x.asList())
    PATHSET.setParseAction(f("pathset"))

    GROUP = pp.Suppress(pp.Word("users")) + pp.Suppress(
        pp.Word("in")) + pp.Word(pp.alphanums)  #.setResultsName("GROUP")

    GROUP.setParseAction(lambda x: ('group', x.asList()[0]))

    ABSTRACT = pp.Suppress(LBRACK) + pp.Word(
        pp.alphanums)("absuser") + pp.Suppress(pp.Word(",")) + pp.Optional(
            pp.Word(pp.alphanums + ","))("absgroups") + pp.Suppress(RBRACK)
    ABSTRACT.setParseAction(lambda x: ('abstract', x.asDict()))

    USER = pp.NotAny(pp.Keyword("agent")) + pp.Word(pp.alphanums)

    USER.setParseAction(lambda x: ('user', x.asList()[0]))

    AGENT = pp.Group(pp.Or([USER, GROUP]))

    AGENT.setParseAction(lambda x: x.asList()[0])

    AGENTBODY = pp.delimitedList(AGENT, delim=',')("AGENTS")
    AGENTBODY.setParseAction(lambda s, l, t: t)

    AGENTOPTION = pp.Group(
        pp.Suppress(LBRACE) + AGENTBODY + pp.Suppress(RBRACE))
    AGENTOPTION.setParseAction(lambda s, l, t: t[0])

    AGENTSET = pp.Group(pp.Or([AGENTOPTION, AGENT]))

    AGENTSET.setParseAction(
        lambda x: {"agentset": set(agentset2set(x.asList()[0]))})

    def agentset2set(xs):
        base = []
        for x in xs:
            if x[0] == 'user':
                base.append(x)
            else:
                base = base + agent.mkGroup(x[1])

        return base

    IDENTIFIER = pp.Word(pp.alphanums)("ID")

    # agentexp

    AGENTEXP = pp.Forward()

    # -- should change AGENTEXP to AGENTATOM
    AGENTATOMPAIR = pp.Suppress(LP) + AGENTEXP + pp.Suppress(RP)
    AGENTATOMPAIR.setParseAction(f("agentapplication"))

    #used for creating vars and dereferencing vars

    AGENTVAR = pp.Suppress(pp.Word("agent")) + IDENTIFIER
    #AGENTVAR.setParseAction(f("agentvar"))
    AGENTVAR.setParseAction(lambda x: {"agentvar": x.asList()[0]})

    AGENTATOM = AGENTATOMPAIR | AGENTSET | AGENTVAR

    AGENTATOML = AGENTATOM("LEFT")
    AGENTATOML.setParseAction(f("lhs"))

    AGENTATOMR = AGENTATOM("RIGHT")
    AGENTATOMR.setParseAction(f("rhs"))
    AGENTEXP << AGENTATOML + pp.Optional(OP + AGENTATOMR)

    AGENTASSIGNMENT = AGENTVAR + pp.Suppress(EQ) + AGENTATOM
    AGENTASSIGNMENT.setParseAction(f("agentassignment"))

    # pathexp
    PATHEXP = pp.Forward()

    PATHATOMPAIR = pp.Suppress(LP) + PATHEXP + pp.Suppress(RP)
    PATHATOMPAIR.setParseAction(f("application"))

    #used for creating vars and dereferencing vars

    PATHVAR = pp.Suppress(pp.Word("path")) + IDENTIFIER
    PATHVAR.setParseAction(f("pathvar"))

    PATHATOM = PATHATOMPAIR | PATHVAR | PATHSET

    PATHATOML = PATHATOM("LEFT")
    PATHATOML.setParseAction(f("lhs"))

    PATHATOMR = PATHATOM("RIGHT")
    PATHATOMR.setParseAction(f("rhs"))
    PATHEXP << PATHATOML + pp.Optional(OP + PATHATOMR)

    PATHASSIGNMENT = PATHVAR + pp.Suppress(EQ) + PATHATOM
    PATHASSIGNMENT.setParseAction(f("pathassignment"))

    CAN = pp.Word("cannot") | pp.Word("can")
    CAN.setParseAction(f("bool"))

    READ = pp.Word("read")
    WRITE = pp.Word("write")
    EXECUTE = pp.Word("execute")
    TRAVERSE = pp.Word("traverse")
    DISCOVER = pp.Word("discover")
    EDIT = pp.Word("edit")
    PERMISSION = pp.Word("permission")

    PERMS = pp.Or(
        [READ, WRITE, EXECUTE, TRAVERSE, PERMISSION, EXECUTE, DISCOVER, EDIT])
    AND = pp.Word("and")
    PERMLIST = pp.delimitedList(PERMS, delim=AND).setResultsName("PERMS")
    PERMLIST.setParseAction(lambda x: x.asDict())

    # RULEATOM = ATOM
    # RULEATOM.setParseAction(lambda x:path2function(x))
    #ONLY = pp.Optional(pp.Word("only"))("ONLY")

    RULE = AGENTATOM + CAN + PERMLIST + pp.Word("in") + PATHATOM
    RULE.setParseAction(f("rule"))

    ONLYRULE = pp.Suppress(pp.Word(
        "only")) + AGENTATOM + CAN + PERMLIST + pp.Word("in") + PATHATOM
    ONLYRULE.setParseAction(f("onlyrule"))

    COMMENT = pp.Suppress(pp.Word("#") + pp.Word(pp.alphanums + "+-./*? "))
    ATOMS = pp.ZeroOrMore(AGENTASSIGNMENT + pp.Optional(COMMENT)
                          | PATHASSIGNMENT + pp.Optional(COMMENT)
                          | ONLYRULE + pp.Optional(COMMENT)
                          | RULE + pp.Optional(COMMENT)
                          | PATHATOM + pp.Optional(COMMENT)
                          | COMMENT)

    def parseStatement(statement):
        #print("parseStatement asked to parse",str(statement))
        if "pathassignment" in statement:
            s = statement["pathassignment"]

            #get the id to which the result is to be assiged
            id = s[0]["pathvar"][0]
            v = path2function(s[1])
            env_set(id, v)
            printenv(env)
            return s
        elif "agentassignment" in statement:
            s = statement["agentassignment"]

            #get the id to which the result is to be assiged
            id = s[0]["agentvar"][0]
            id = s[0]["agentvar"]

            #print("...agentassignment",s[1])
            v = path2function(s[1])
            #v= s[1]
            env_set("agent_" + id, v)
            #used to show state of environment
            printenv(env)
            return s
        elif "onlyrule" in statement:
            #print("this is a rule")
            s = statement["onlyrule"]
            #print("s**",s)
            #print("rule agent", s[0])
            agent = path2function(s[0])
            #print("the agent is: " + str(agent))
            #print("agentinvoke",agent(1))
            s[0] = agent
            cantype = s[1]['bool'][0] + "only"
            s[1]["only"] = True
            #print(cantype)
            perms = s[2]['PERMS']
            path = s[4]
            #print(s[4])
            f = path2function(s[4])

            #print("the path '" +str(s[4])+ "'' is represented by a function: " + str(f) )
            s[4] = f
            #print(f)
            #print(s)
            #printenv(env)
            rules_to_execute.append((s, cantype, perms, agent, f))

            return s

        elif "rule" in statement:
            #print("this is a rule")
            s = statement["rule"]
            #print("s**",s)
            #print("rule agent", s[0])
            agent = path2function(s[0])
            #print("the agent is: " + str(agent))
            #print("agentinvoke",agent(1))
            s[0] = agent
            cantype = s[1]['bool'][0]
            #print(cantype)
            perms = s[2]['PERMS']
            path = s[4]
            #print(s[4])
            f = path2function(s[4])

            s[4] = f
            #print(f)
            #print(s)
            #printenv(env)
            rules_to_execute.append((s, cantype, perms, agent, f))
            return s

    #print(AGENTATOM.parseString(s.strip()))
    print("=" * 80)
    statements = ATOMS.parseString(s.strip())

    for statement in statements:

        r = parseStatement(statement)

    #debugging stuff
    # print("-->user",USER.parseString("user1")[0])
    # print("--->group", GROUP.parseString("users in comp105")[0])
    # print("--->agent",AGENT.parseString("users in comp105")[0])
    # print("--->agent",AGENT.parseString("users1")[0])
    # print("--->agentset",AGENTSET.parseString("{user1,user2,users in comp105}")[0])
    # print("--->agentvar",AGENTVAR.parseString("agent xyz")[0])
    # print("--->agentOP",AGENTATOMPAIR.parseString("(agent xyz and {user1,users in groupxxx,user3})")[0])

    return rules_to_execute
Example #20
0
REGEX_ACTION = (FACT | VAR) + pp.Group(REGEX_OP + (VAR | REGEX))

#TODO: EL_ARRAY -> SEQUENCE
#a basic array of values in EL: [ e1, e2 ... en ]
EL_ARRAY = array_template(CONDITION | FACT | (EL_COMPARISON ^ ARITH_FACT)
                          | REGEX_ACTION | ELEMENT)

#TODO:Other Actions? Stack/Queue/sample_from?
#TODO: add a negated path var fact special case.
# (ie: {.a.b.$x? -> [email protected] }
ACTION_ARRAY = array_template(ARITH_FACT | REGEX_ACTION | FACT,
                              brackets_optional=True)

#Fact Components, [Root ... pairs ... terminal]
#Core part of a fact: a.b!c => (a,DOT),(b.EX)
EL_PAIR = ELEMENT + pp.NotAny(pp.LineEnd()) + (DOT | EX)
EL_FACT_ROOT = pp.Group(((VAR | DBL_VLINE) + (DOT | EX)) | DOT).setResultsName(
    str(PARSENAMES.ROOT))
EL_FACT_TERMINAL = ELEMENT | pp.Group(EL_ARRAY)
#An Entire sequence, note the stopOn to not continue over lines
FACT << op(NOT).setResultsName(str(PARSENAMES.NOT)) + \
                              EL_FACT_ROOT + \
                              pp.Group(pp.ZeroOrMore(EL_PAIR)).setResultsName(str(PARSENAMES.BASEFACT)) + \
                              pp.Group(EL_FACT_TERMINAL).setResultsName(str(PARSENAMES.TERMINAL))

BIND_STATEMENT = VAR + s(BIND) + op(FACT)

#Execute Statements?

#The entire grammar:
ROOT = pp.OneOrMore((BIND_STATEMENT | CONDITION | FACT) + \
Example #21
0
def ParseCode(codeDefn, filename):
    # The file name is used when printing error messages
    global CurrentFileName
    CurrentFileName = filename

    funcExpr = MakeFuncExpr()
    handlerExpr = MakeHandlerExpr()
    eventExpr = MakeEventExpr()
    refExpr = MakeRefExpr()
    defineExpr = MakeDefineExpr()
    enumExpr = MakeEnumExpr()
    bitMaskExpr = MakeBitMaskExpr()
    importExpr = MakeImportExpr()

    # Define an expression containing all keywords that can be preceded by a doxygen-style comment.
    # todo: There is probably a better way to do this with the expressions above, rather than
    #       defining 'keywords' here, but it would probably require changes to the expression
    #       definitions, so this is good enough for now.
    keywords = (KeywordFunction
                | KeywordHandler
                | KeywordEvent
                | KeywordReference
                | KeywordDefine
                | KeywordEnum
                | KeywordBitMask)

    # The expressions are applied in the order listed, so give the more common expressions first.
    allcode = (pyparsing.ZeroOrMore(pyparsing.cStyleComment +
                                    pyparsing.NotAny(keywords)) +
               pyparsing.ZeroOrMore(funcExpr
                                    | handlerExpr
                                    | eventExpr
                                    | refExpr
                                    | defineExpr
                                    | enumExpr
                                    | bitMaskExpr
                                    | importExpr))

    # Pre-process to remove all comments except for doxygen comments.
    pyparsing.cppStyleComment.setParseAction(ProcessDoxygen)
    codeDefn = pyparsing.cppStyleComment.transformString(codeDefn)

    # Error handling is done in FailFunc() now.  However, just in case a parser exception slips
    # through, handle it here, although the error message and/or location may not be as accurate
    # as when handled by FailFunc().  In the rare case that another, unexpected, exception happens,
    # then just let it crash the program so that we get a traceback.
    try:
        resultList = allcode.parseString(codeDefn, parseAll=True)
    except pyparsing.ParseException as error:
        print "** Unexpected ParseException occurred **"
        PrintErrorMessage(codeDefn, error.lineno, error.col, error.msg)
        sys.exit(1)

    # Need to separate the header comments from the code in the raw resultList, and return
    # a dictionary with the appropriate sections
    headerList = []
    codeList = []
    importList = []
    for r in resultList:
        if isinstance(r, str):
            headerList.append(r)
        elif isinstance(r, codeTypes.ImportData):
            importList.append(r)
        else:
            codeList.append(r)

    resultData = dict(headerList=headerList,
                      codeList=codeList,
                      importList=importList)

    return resultData
Example #22
0
class MacWifiLogParser(text_parser.PyparsingSingleLineTextParser):
    """Parse text based on wifi.log file."""

    NAME = 'macwifi'
    DESCRIPTION = 'Parser for MacOS wifi.log files.'

    _ENCODING = 'utf-8'

    THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS
    THREE_LETTERS = text_parser.PyparsingConstants.THREE_LETTERS

    # Regular expressions for known actions.
    _CONNECTED_RE = re.compile(r'Already\sassociated\sto\s(.*)\.\sBailing')
    _WIFI_PARAMETERS_RE = re.compile(
        r'\[ssid=(.*?), bssid=(.*?), security=(.*?), rssi=')

    _KNOWN_FUNCTIONS = [
        'airportdProcessDLILEvent', '_doAutoJoin', '_processSystemPSKAssoc'
    ]

    _AGENT = (pyparsing.Literal('<') + pyparsing.Combine(
        pyparsing.Literal('airportd') + pyparsing.CharsNotIn('>'),
        joinString='',
        adjacent=True).setResultsName('agent') + pyparsing.Literal('>'))

    _DATE_TIME = pyparsing.Group(
        THREE_LETTERS.setResultsName('day_of_week') +
        THREE_LETTERS.setResultsName('month') +
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(
            'day') + text_parser.PyparsingConstants.TIME_ELEMENTS +
        pyparsing.Suppress('.') + THREE_DIGITS.setResultsName('milliseconds'))

    # Log line with a known function name.
    _MAC_WIFI_KNOWN_FUNCTION_LINE = (
        _DATE_TIME.setResultsName('date_time') + _AGENT +
        pyparsing.oneOf(_KNOWN_FUNCTIONS).setResultsName('function') +
        pyparsing.Literal(':') +
        pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text'))

    # Log line with an unknown function name.
    _MAC_WIFI_LINE = (
        _DATE_TIME.setResultsName('date_time') +
        pyparsing.NotAny(_AGENT + pyparsing.oneOf(_KNOWN_FUNCTIONS) +
                         pyparsing.Literal(':')) +
        pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text'))

    _MAC_WIFI_HEADER = (
        _DATE_TIME.setResultsName('date_time') +
        pyparsing.Literal('***Starting Up***').setResultsName('text'))

    _DATE_TIME_TURNED_OVER_HEADER = pyparsing.Group(
        text_parser.PyparsingConstants.MONTH.setResultsName('month') +
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(
            'day') + text_parser.PyparsingConstants.TIME_ELEMENTS)

    _MAC_WIFI_TURNED_OVER_HEADER = (
        _DATE_TIME_TURNED_OVER_HEADER.setResultsName('date_time') +
        pyparsing.Combine(
            pyparsing.Word(pyparsing.printables) +
            pyparsing.Word(pyparsing.printables) +
            pyparsing.Literal('logfile turned over') + pyparsing.LineEnd(),
            joinString=' ',
            adjacent=False).setResultsName('text'))

    # Define the available log line structures.
    LINE_STRUCTURES = [('header', _MAC_WIFI_HEADER),
                       ('turned_over_header', _MAC_WIFI_TURNED_OVER_HEADER),
                       ('known_function_logline',
                        _MAC_WIFI_KNOWN_FUNCTION_LINE),
                       ('logline', _MAC_WIFI_LINE)]

    _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

    def __init__(self):
        """Initializes a parser object."""
        super(MacWifiLogParser, self).__init__()
        self._last_month = 0
        self._year_use = 0

    def _GetAction(self, action, text):
        """Parse the well known actions for easy reading.

    Args:
      action (str): the function or action called by the agent.
      text (str): mac Wifi log text.

    Returns:
       str: a formatted string representing the known (or common) action.
           If the action is not known the original log text is returned.
    """
        # TODO: replace "x in y" checks by startswith if possible.
        if 'airportdProcessDLILEvent' in action:
            interface = text.split()[0]
            return 'Interface {0:s} turn up.'.format(interface)

        if 'doAutoJoin' in action:
            match = self._CONNECTED_RE.match(text)
            if match:
                ssid = match.group(1)[1:-1]
            else:
                ssid = 'Unknown'
            return 'Wifi connected to SSID {0:s}'.format(ssid)

        if 'processSystemPSKAssoc' in action:
            wifi_parameters = self._WIFI_PARAMETERS_RE.search(text)
            if wifi_parameters:
                ssid = wifi_parameters.group(1)
                bssid = wifi_parameters.group(2)
                security = wifi_parameters.group(3)
                if not ssid:
                    ssid = 'Unknown'
                if not bssid:
                    bssid = 'Unknown'
                if not security:
                    security = 'Unknown'

                return ('New wifi configured. BSSID: {0:s}, SSID: {1:s}, '
                        'Security: {2:s}.').format(bssid, ssid, security)

        return text

    def _GetTimeElementsTuple(self, key, structure):
        """Retrieves a time elements tuple from the structure.

    Args:
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Returns:
      tuple: containing:
        year (int): year.
        month (int): month, where 1 represents January.
        day_of_month (int): day of month, where 1 is the first day of the month.
        hours (int): hours.
        minutes (int): minutes.
        seconds (int): seconds.
        milliseconds (int): milliseconds.
    """
        if key == 'turned_over_header':
            month, day, hours, minutes, seconds = structure.date_time

            milliseconds = 0
        else:
            _, month, day, hours, minutes, seconds, milliseconds = structure.date_time

        # Note that dfdatetime_time_elements.TimeElements will raise ValueError
        # for an invalid month.
        month = timelib.MONTH_DICT.get(month.lower(), 0)

        if month != 0 and month < self._last_month:
            # Gap detected between years.
            self._year_use += 1

        return self._year_use, month, day, hours, minutes, seconds, milliseconds

    def _ParseLogLine(self, parser_mediator, key, structure):
        """Parse a single log line and produce an event object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
        time_elements_tuple = self._GetTimeElementsTuple(key, structure)

        try:
            date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=time_elements_tuple)
        except ValueError:
            parser_mediator.ProduceExtractionWarning(
                'invalid date time value: {0!s}'.format(structure.date_time))
            return

        self._last_month = time_elements_tuple[1]

        event_data = MacWifiLogEventData()
        event_data.agent = structure.agent
        # Due to the use of CharsNotIn pyparsing structure contains whitespaces
        # that need to be removed.
        event_data.function = structure.function.strip()
        event_data.text = structure.text

        if key == 'known_function_logline':
            event_data.action = self._GetAction(event_data.function,
                                                event_data.text)

        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_ADDED)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in self._SUPPORTED_KEYS:
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        self._ParseLogLine(parser_mediator, key, structure)

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a Mac Wifi log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
        self._last_month = 0
        self._year_use = parser_mediator.GetEstimatedYear()

        key = 'header'

        try:
            structure = self._MAC_WIFI_HEADER.parseString(line)
        except pyparsing.ParseException:
            structure = None

        if not structure:
            key = 'turned_over_header'

            try:
                structure = self._MAC_WIFI_TURNED_OVER_HEADER.parseString(line)
            except pyparsing.ParseException:
                structure = None

        if not structure:
            logger.debug('Not a Mac Wifi log file')
            return False

        time_elements_tuple = self._GetTimeElementsTuple(key, structure)

        try:
            dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=time_elements_tuple)
        except ValueError:
            logger.debug(
                'Not a Mac Wifi log file, invalid date and time: {0!s}'.format(
                    structure.date_time))
            return False

        self._last_month = time_elements_tuple[1]

        return True
Example #23
0
    def _generate_grammar(self):
        # Define grammar:
        pp.ParserElement.setDefaultWhitespaceChars(" \t")

        def add_element(name: str, value: pp.ParserElement):
            nonlocal self
            if self.debug:
                value.setName(name)
                value.setDebug()
            return value

        EOL = add_element("EOL", pp.Suppress(pp.LineEnd()))
        Else = add_element("Else", pp.Keyword("else"))
        Identifier = add_element(
            "Identifier",
            pp.Word(f"{pp.alphas}_", bodyChars=pp.alphanums + "_-./"))
        BracedValue = add_element(
            "BracedValue",
            pp.nestedExpr(ignoreExpr=pp.quotedString
                          | pp.QuotedString(quoteChar="$(",
                                            endQuoteChar=")",
                                            escQuote="\\",
                                            unquoteResults=False)).
            setParseAction(lambda s, l, t: ["(", *t[0], ")"]),
        )

        Substitution = add_element(
            "Substitution",
            pp.Combine(
                pp.Literal("$") +
                (((pp.Literal("$") + Identifier + pp.Optional(pp.nestedExpr()))
                  | (pp.Literal("(") + Identifier + pp.Literal(")"))
                  | (pp.Literal("{") + Identifier + pp.Literal("}"))
                  | (pp.Literal("$") + pp.Literal("{") + Identifier +
                     pp.Optional(pp.nestedExpr()) + pp.Literal("}"))
                  | (pp.Literal("$") + pp.Literal("[") + Identifier +
                     pp.Literal("]"))))),
        )
        LiteralValuePart = add_element(
            "LiteralValuePart", pp.Word(pp.printables, excludeChars="$#{}()"))
        SubstitutionValue = add_element(
            "SubstitutionValue",
            pp.Combine(
                pp.OneOrMore(Substitution | LiteralValuePart
                             | pp.Literal("$"))),
        )
        FunctionValue = add_element(
            "FunctionValue",
            pp.Group(
                pp.Suppress(pp.Literal("$") + pp.Literal("$")) + Identifier +
                pp.nestedExpr(
                )  # .setParseAction(lambda s, l, t: ['(', *t[0], ')'])
            ).setParseAction(lambda s, l, t: handle_function_value(*t)),
        )
        Value = add_element(
            "Value",
            pp.NotAny(Else | pp.Literal("}") | EOL) +
            (pp.QuotedString(quoteChar='"', escChar="\\")
             | FunctionValue
             | SubstitutionValue
             | BracedValue),
        )

        Values = add_element("Values", pp.ZeroOrMore(Value)("value"))

        Op = add_element(
            "OP",
            pp.Literal("=")
            | pp.Literal("-=")
            | pp.Literal("+=")
            | pp.Literal("*=")
            | pp.Literal("~="),
        )

        Key = add_element("Key", Identifier)

        Operation = add_element(
            "Operation",
            Key("key") + pp.locatedExpr(Op)("operation") + Values("value"))
        CallArgs = add_element("CallArgs", pp.nestedExpr())

        def parse_call_args(results):
            out = ""
            for item in chain(*results):
                if isinstance(item, str):
                    out += item
                else:
                    out += "(" + parse_call_args(item) + ")"
            return out

        CallArgs.setParseAction(parse_call_args)

        Load = add_element("Load", pp.Keyword("load") + CallArgs("loaded"))
        Include = add_element(
            "Include",
            pp.Keyword("include") + pp.locatedExpr(CallArgs)("included"))
        Option = add_element("Option",
                             pp.Keyword("option") + CallArgs("option"))
        RequiresCondition = add_element("RequiresCondition",
                                        pp.originalTextFor(pp.nestedExpr()))

        def parse_requires_condition(s, l_unused, t):
            # The following expression unwraps the condition via the additional info
            # set by originalTextFor.
            condition_without_parentheses = s[t._original_start +
                                              1:t._original_end - 1]

            # And this replaces the colons with '&&' similar how it's done for 'Condition'.
            condition_without_parentheses = (
                condition_without_parentheses.strip().replace(
                    ":", " && ").strip(" && "))
            return condition_without_parentheses

        RequiresCondition.setParseAction(parse_requires_condition)
        Requires = add_element(
            "Requires",
            pp.Keyword("requires") +
            RequiresCondition("project_required_condition"))

        FunctionArgumentsAsString = add_element(
            "FunctionArgumentsAsString", pp.originalTextFor(pp.nestedExpr()))
        QtNoMakeTools = add_element(
            "QtNoMakeTools",
            pp.Keyword("qtNomakeTools") +
            FunctionArgumentsAsString("qt_no_make_tools_arguments"),
        )

        # ignore the whole thing...
        DefineTestDefinition = add_element(
            "DefineTestDefinition",
            pp.Suppress(
                pp.Keyword("defineTest") + CallArgs +
                pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd())
            ),
        )

        # ignore the whole thing...
        ForLoop = add_element(
            "ForLoop",
            pp.Suppress(
                pp.Keyword("for") + CallArgs +
                pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd())
            ),
        )

        # ignore the whole thing...
        ForLoopSingleLine = add_element(
            "ForLoopSingleLine",
            pp.Suppress(
                pp.Keyword("for") + CallArgs + pp.Literal(":") +
                pp.SkipTo(EOL)),
        )

        # ignore the whole thing...
        FunctionCall = add_element("FunctionCall",
                                   pp.Suppress(Identifier + pp.nestedExpr()))

        Scope = add_element("Scope", pp.Forward())

        Statement = add_element(
            "Statement",
            pp.Group(Load
                     | Include
                     | Option
                     | Requires
                     | QtNoMakeTools
                     | ForLoop
                     | ForLoopSingleLine
                     | DefineTestDefinition
                     | FunctionCall
                     | Operation),
        )
        StatementLine = add_element("StatementLine",
                                    Statement + (EOL | pp.FollowedBy("}")))
        StatementGroup = add_element(
            "StatementGroup",
            pp.ZeroOrMore(StatementLine | Scope | pp.Suppress(EOL)))

        Block = add_element(
            "Block",
            pp.Suppress("{") + pp.Optional(EOL) + StatementGroup +
            pp.Optional(EOL) + pp.Suppress("}") + pp.Optional(EOL),
        )

        ConditionEnd = add_element(
            "ConditionEnd",
            pp.FollowedBy(
                (pp.Optional(pp.White()) +
                 (pp.Literal(":") | pp.Literal("{") | pp.Literal("|")))),
        )

        ConditionPart1 = add_element(
            "ConditionPart1",
            (pp.Optional("!") + Identifier + pp.Optional(BracedValue)))
        ConditionPart2 = add_element("ConditionPart2",
                                     pp.CharsNotIn("#{}|:=\\\n"))
        ConditionPart = add_element(
            "ConditionPart", (ConditionPart1 ^ ConditionPart2) + ConditionEnd)

        ConditionOp = add_element("ConditionOp",
                                  pp.Literal("|") ^ pp.Literal(":"))
        ConditionWhiteSpace = add_element(
            "ConditionWhiteSpace", pp.Suppress(pp.Optional(pp.White(" "))))

        ConditionRepeated = add_element(
            "ConditionRepeated",
            pp.ZeroOrMore(ConditionOp + ConditionWhiteSpace + ConditionPart))

        Condition = add_element("Condition",
                                pp.Combine(ConditionPart + ConditionRepeated))
        Condition.setParseAction(
            lambda x: " ".join(x).strip().replace(":", " && ").strip(" && "))

        # Weird thing like write_file(a)|error() where error() is the alternative condition
        # which happens to be a function call. In this case there is no scope, but our code expects
        # a scope with a list of statements, so create a fake empty statement.
        ConditionEndingInFunctionCall = add_element(
            "ConditionEndingInFunctionCall",
            pp.Suppress(ConditionOp) + FunctionCall +
            pp.Empty().setParseAction(lambda x: [[]]).setResultsName(
                "statements"),
        )

        SingleLineScope = add_element(
            "SingleLineScope",
            pp.Suppress(pp.Literal(":")) +
            pp.Group(Block | (Statement + EOL))("statements"),
        )
        MultiLineScope = add_element("MultiLineScope", Block("statements"))

        SingleLineElse = add_element(
            "SingleLineElse",
            pp.Suppress(pp.Literal(":")) + (Scope | Block |
                                            (Statement + pp.Optional(EOL))),
        )
        MultiLineElse = add_element("MultiLineElse", Block)
        ElseBranch = add_element(
            "ElseBranch",
            pp.Suppress(Else) + (SingleLineElse | MultiLineElse))

        # Scope is already add_element'ed in the forward declaration above.
        Scope <<= pp.Group(
            Condition("condition") + (SingleLineScope | MultiLineScope
                                      | ConditionEndingInFunctionCall) +
            pp.Optional(ElseBranch)("else_statements"))

        Grammar = StatementGroup("statements")
        Grammar.ignore(pp.pythonStyleComment())

        return Grammar
Example #24
0
parameter_name = (pp.White(' ', exact=2).suppress() + pp.Word('-').suppress() +
                  word).setResultsName('name')

metavar = pp.Combine(
    pp.ZeroOrMore(pp.White(' ').suppress() + word).setResultsName('metavar'))

parameter = pp.Group(parameter_name + metavar + nl + desc)

block_name = pp.Or(
    (pp.Literal('SYNOPSIS'), pp.Literal('USAGE'), pp.Literal('DESCRIPTION'),
     pp.Literal('OPTIONS'), pp.Literal('AUTHOR'), pp.Literal('COPYRIGHT'),
     pp.Literal('REFERENCES'))).setResultsName('block_name')

parameter_comment = (
    pp.NotAny(block_name) +
    pp.OneOrMore(word + pp.Optional(space)).setResultsName('parameter_comment')
    + nl + empty_line)

parser = pp.ZeroOrMore(parameter)

title = (pp.Literal('MRtrix') + line + nl + indent(5) +
         pp.Word(pp.alphas).setResultsName('cmd') + pp.Literal(':') + line +
         nl)
synopsis = (pp.Literal('SYNOPSIS').suppress() + nl +
            pp.Combine(pp.OneOrMore(desc_line)).setResultsName('synopsis'))
usage = (
    pp.Literal('USAGE').suppress() + nl + nl + indent(5) + word + space +
    pp.Literal('[ parameters ]').suppress() +
    pp.OneOrMore(space.suppress() + word) + nl + nl + pp.OneOrMore(
        indent(8) + word + indent(6) + pp.OneOrMore(space + word) + nl + nl))
Example #25
0
    def parse_nets(self):
        EOL = pp.LineEnd().suppress()
        linebreak = pp.Suppress(";" + pp.LineEnd())
        identifier = pp.Word(
            pp.alphanums +
            '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~')  # CONFLICT with '();'
        number = pp.pyparsing_common.number
        word = pp.Word(pp.alphas)
        LPAR = pp.Suppress('(')
        RPAR = pp.Suppress(')')
        ORIENT = (pp.Keyword('N')
                  | pp.Keyword('S')
                  | pp.Keyword('E')
                  | pp.Keyword('W')
                  | pp.Keyword('FN')
                  | pp.Keyword('FS')
                  | pp.Keyword('FE')
                  | pp.Keyword('FW'))
        pt = LPAR + pp.OneOrMore(number
                                 | pp.Keyword('*')) + RPAR  # pair of x,y
        nets_id = pp.Keyword('NETS')
        end_nets_id = pp.Keyword("END NETS").suppress()
        begin_net = pp.Keyword('-')
        ws_net = pp.Suppress(pp.Keyword('+'))  # parameter division in NETS

        # netName
        netName_1 = pp.Group(
            LPAR + identifier('compName') + identifier('pinName') +
            pp.Optional(ws_net + pp.Keyword('SYNTHESIZED'))('SYNTHESIZED') +
            RPAR).setResultsName('netName')

        netName_2 = pp.Group(
            pp.Keyword('MUSTJOIN') + LPAR + identifier('compName') +
            identifier('pinName') + RPAR).setResultsName('MUSTJOIN')

        netName = (
            identifier('netName') +
            pp.OneOrMore(netName_1 | netName_2)).setResultsName('netName')

        # SHIELDNET
        SHIELDNET = pp.Group(ws_net + pp.Keyword('SHIELDNET') +
                             identifier('shieldNetName')).setResultsName(
                                 'SHIELDNET')

        # VPIN
        VPIN_PLACEMENT_ids = (pp.Keyword('PLACED')
                              | pp.Keyword('FIXED')
                              | pp.Keyword('COVER'))

        VPIN_PLACEMENT = (VPIN_PLACEMENT_ids('PLACEMENT') +
                          pp.Group(pt)('pt') + pp.ZeroOrMore(word('orient')))

        VPIN_LAYER = pp.Keyword('LAYER') + identifier('layerName')

        VPIN = pp.Group(ws_net + pp.Keyword('VPIN') + identifier('vpinName') +
                        pp.Optional(VPIN_LAYER) + pp.Group(pt)('pt1') +
                        pp.Group(pt)('pt2') +
                        pp.Optional(pp.Group(VPIN_PLACEMENT)('PLACEMENT')))(
                            'VPIN')

        # routingPoints (used by regularWiring)
        MASK_id = pp.Keyword('MASK')('MASK')
        RECT_id = pp.Keyword('RECT')('RECT')
        VIRTUAL_id = pp.Keyword('VIRTUAL')('VIRTUAL')

        routingPoints_1 = (pp.Optional(MASK_id + number('maskNum')) +
                           pp.Group(pt))

        routingPoints_2 = (pp.Optional(MASK_id + number('viaMaskNum')) +
                           pp.NotAny(pp.Keyword('NEW') | pp.Keyword('RECT')) +
                           identifier('viaName') +
                           pp.Optional(ORIENT('orient')))

        routingPoints_3 = (pp.Optional(MASK_id + number('maskNum')) + RECT_id +
                           pp.Group(pt))

        routingPoints_4 = (VIRTUAL_id + pp.Group(pt))

        routingPoints = (pp.Group(pt) + pp.OneOrMore(routingPoints_1
                                                     | routingPoints_2
                                                     | routingPoints_3
                                                     | routingPoints_4))

        # regularWiring
        regularWiring_ids = (pp.Keyword('COVER')
                             | pp.Keyword('FIXED')
                             | pp.Keyword('ROUTED')
                             | pp.Keyword('NOSHIELD'))

        TAPER_RULE = ((pp.Keyword('TAPER') | pp.Keyword('TAPERRULE')) +
                      identifier('ruleName'))

        STYLE = (pp.Keyword('STYLE') + identifier('layerName') +
                 pp.OneOrMore(pt))

        regularWiring_Head = pp.Group(
            regularWiring_ids('WIRING_id') + identifier('layerName') +
            pp.Optional(TAPER_RULE)('TAPER_RULE') +
            pp.Optional(STYLE)('STYLE') +
            pp.OneOrMore(routingPoints)('routingPoints'))

        NEW_WIRING = pp.Group(
            pp.Keyword('NEW')('WIRING_id') + identifier('layerName') +
            pp.Optional(TAPER_RULE)('TAPER_RULE') +
            pp.Optional(STYLE)('STYLE') +
            pp.OneOrMore(routingPoints)('routingPoints'))

        regularWiring = pp.Group(
            ws_net + pp.Group(regularWiring_Head)('WIRING_Head') +
            pp.Group(pp.ZeroOrMore(NEW_WIRING))('NEW_WIRING'))('WIRING')

        # SUBNET
        SUBNET_regularWiring = pp.Group(
            pp.Group(regularWiring_Head)('WIRING_Head') +
            pp.Group(pp.ZeroOrMore(NEW_WIRING))('NEW_WIRING'))('WIRING')

        SUBNET_NONDEFAULTRULE = (pp.Keyword('NONDEFAULTRULE') +
                                 identifier('NONDEFAULTRULE_ruleName'))

        SUBNET_pin_type = (pp.Keyword('VPIN')('VPIN')
                           | pp.Keyword('PIN')('PIN')
                           | identifier('compName'))

        SUBNET = pp.Group(ws_net + pp.Keyword('SUBNET') +
                          identifier('subnetName') +
                          pp.ZeroOrMore(LPAR + SUBNET_pin_type +
                                        identifier('pinName') + RPAR) +
                          pp.Optional(SUBNET_NONDEFAULTRULE) +
                          pp.ZeroOrMore(SUBNET_regularWiring))('SUBNET')

        # XTALK
        XTALK = (ws_net + pp.Keyword('XTALK') + number('XTALK_class'))

        # NONDEFAULTRULE
        NONDEFAULTRULE = (ws_net + pp.Keyword('NONDEFAULTRULE') +
                          identifier('NONDEFAULTRULE_ruleName'))

        # SOURCE
        SOURCE = (ws_net + pp.Keyword('SOURCE') +
                  (pp.Keyword('DIST')
                   | pp.Keyword('NETLIST')
                   | pp.Keyword('TEST')
                   | pp.Keyword('TIMING')
                   | pp.Keyword('USER'))('SOURCE'))

        # FIXEDBUMP
        FIXEDBUMP = (ws_net + pp.Keyword('FIXEDBUMP')('FIXEDBUMP'))

        # FREQUENCY
        FREQUENCY = (ws_net + pp.Keyword('FREQUENCY') + number('FREQUENCY'))

        # ORIGINAL
        ORIGINAL = (ws_net + pp.Keyword('ORIGINAL') +
                    identifier('ORIGINAL_netName'))

        # USE > USE_ids
        USE_ids = (pp.Keyword('ANALOG')
                   | pp.Keyword('CLOCK')
                   | pp.Keyword('GROUND')
                   | pp.Keyword('POWER')
                   | pp.Keyword('RESET')
                   | pp.Keyword('SCAN')
                   | pp.Keyword('SIGNAL')
                   | pp.Keyword('TIEOFF'))

        # USE
        USE = ws_net + pp.Keyword('USE') + USE_ids('USE')

        # PATTERN
        PATTERN_ids = (pp.Keyword('BALANCED')
                       | pp.Keyword('STEINER')
                       | pp.Keyword('TRUNK')
                       | pp.Keyword('WIREDLOGIC'))

        PATTERN = (ws_net + pp.Keyword('PATTERN') + PATTERN_ids('PATTERN'))

        # ESTCAP
        ESTCAP = (ws_net + pp.Keyword('ESTCAP') + number('ESTCAP_wireCap'))

        # WEIGHT
        WEIGHT = (ws_net + pp.Keyword('WEIGHT') + number('WEIGHT'))

        # PROPERTY
        PROPERTY = pp.Group(ws_net + pp.Keyword('PROPERTY') + pp.OneOrMore(
            identifier('propName') + number('propVal')))('PROPERTY')

        # Refactor this!?
        if self.ignore_nets_route:
            regularWiring = pp.SkipTo((EOL + ws_net) | linebreak)

        net = pp.Group(
            pp.Suppress(begin_net) + netName + pp.Optional(SHIELDNET) +
            pp.ZeroOrMore(VPIN) + pp.ZeroOrMore(SUBNET) + pp.Optional(XTALK) +
            pp.Optional(NONDEFAULTRULE) + pp.ZeroOrMore(regularWiring) +
            pp.Optional(SOURCE) + pp.Optional(FIXEDBUMP) +
            pp.Optional(FREQUENCY) + pp.Optional(ORIGINAL) + pp.Optional(USE) +
            pp.Optional(PATTERN) + pp.Optional(ESTCAP) + pp.Optional(WEIGHT) +
            pp.ZeroOrMore(PROPERTY) + linebreak).setResultsName(
                'net', listAllMatches=True)

        nets = pp.Group(
            pp.Suppress(nets_id) + number('numNets') + linebreak +
            pp.ZeroOrMore(net) +
            pp.Suppress(end_nets_id)).setResultsName('NETS')

        return nets
Example #26
0
import pyparsing as pp
import operator
import math
expop = pp.Literal('**')
plus = pp.Literal('+')
minus = pp.Literal('-')
div = pp.Literal('/') 
mult = (pp.Literal('*') + pp.NotAny("*"))
dot = pp.Literal(".").setName('dot').suppress()
bitOr = pp.Literal('|')
bitAnd = pp.Literal('&')
bitXor = pp.Literal('^')
bitNot = pp.Literal('~')
lShift = pp.Literal("<<")
rShift = pp.Literal(">>")

multop = div ^ mult
addop = plus ^ minus
rPar = pp.Literal(')').suppress()
lPar = pp.Literal('(').suppress()
lBrac = pp.Literal('[').suppress()
rBrac = pp.Literal(']').suppress()
lCur = pp.Literal("{").suppress()
rCur = pp.Literal("}").suppress()
and_ = pp.Keyword('AND') ^ bitAnd    
orXor_ = pp.Keyword('OR') ^ bitOr ^bitXor
not_ =  pp.Keyword('NOT')
qStr = pp.QuotedString(quoteChar='"', unquoteResults=False)
eq = pp.Literal('=').suppress()
neq = pp.Literal('<>')
lessEq = pp.Literal('<=')
Example #27
0
class sparc_syntax:

    divide = False
    noprefix = False

    comment = pp.Regex(r'\#.*')
    symbol = pp.Regex(r'[A-Za-z_.$][A-Za-z0-9_.$]*').setParseAction(
        lambda r: env.ext(r[0], size=32))
    mnemo = pp.LineStart() + symbol + pp.Optional(pp.Literal(',a'))
    mnemo.setParseAction(lambda r: r[0].ref.lower() + ''.join(r[1:]))
    integer = pp.Regex(r'[1-9][0-9]*').setParseAction(lambda r: int(r[0], 10))
    hexa = pp.Regex(r'0[xX][0-9a-fA-F]+').setParseAction(
        lambda r: int(r[0], 16))
    octa = pp.Regex(r'0[0-7]*').setParseAction(lambda r: int(r[0], 8))
    bina = pp.Regex(r'0[bB][01]+').setParseAction(lambda r: int(r[0], 2))
    char = pp.Regex(r"('.)|('\\\\)").setParseAction(lambda r: ord(r[0]))
    number = integer | hexa | octa | bina | char
    number.setParseAction(lambda r: env.cst(r[0], 32))

    term = symbol | number

    exp = pp.Forward()

    op_one = pp.oneOf("- ~")
    op_sig = pp.oneOf("+ -")
    op_mul = pp.oneOf("* /")
    op_cmp = pp.oneOf("== != <= >= < > <>")
    op_bit = pp.oneOf("^ && || & |")

    operators = [
        (op_one, 1, pp.opAssoc.RIGHT),
        (op_sig, 2, pp.opAssoc.LEFT),
        (op_mul, 2, pp.opAssoc.LEFT),
        (op_cmp, 2, pp.opAssoc.LEFT),
        (op_bit, 2, pp.opAssoc.LEFT),
    ]
    reg = pp.Suppress('%') + pp.NotAny(pp.oneOf('hi lo')) + symbol
    hilo = pp.oneOf('%hi %lo') + pp.Suppress('(') + exp + pp.Suppress(')')
    exp << pp.operatorPrecedence(term | reg | hilo, operators)

    adr = pp.Suppress('[') + exp + pp.Suppress(']')
    mem = adr  #+pp.Optional(symbol|imm)
    mem.setParseAction(lambda r: env.mem(r[0]))

    opd = exp | mem | reg
    opds = pp.Group(pp.delimitedList(opd))

    instr = mnemo + pp.Optional(opds) + pp.Optional(comment)

    def action_reg(toks):
        rname = toks[0]
        if rname.ref.startswith('asr'): return env.reg(rname.ref)
        return env.__dict__[rname.ref]

    def action_hilo(toks):
        v = toks[1]
        return env.hi(v) if toks[0] == '%hi' else env.lo(v).zeroextend(32)

    def action_exp(toks):
        tok = toks[0]
        if isinstance(tok, env.exp): return tok
        if len(tok) == 2:
            op = tok[0]
            r = tok[1]
            if isinstance(r, list): r = action_exp(r)
            return env.oper(op, r)
        elif len(tok) == 3:
            op = tok[1]
            l = tok[0]
            r = tok[2]
            if isinstance(l, list): l = action_exp(l)
            if isinstance(r, list): r = action_exp(r)
            return env.oper(op, l, r)
        else:
            return tok

    def action_instr(toks):
        i = instruction('')
        i.mnemonic = toks[0]
        if len(toks) > 1: i.operands = toks[1][0:]
        return asmhelper(i)

    # actions:
    reg.setParseAction(action_reg)
    hilo.setParseAction(action_hilo)
    exp.setParseAction(action_exp)
    instr.setParseAction(action_instr)
def TmxParser():
    def MatchKeywords(keywords):
        return pp.Or(
            map(lambda x: pp.Keyword(x).setResultsName('type'), keywords))

    # Build grammar
    KEYWORDS_3ARG = 'step'
    KEYWORDS_4ARG_WRITE = 'write write2'
    KEYWORDS_4ARG = 'wind wind2'
    KEYWORDS_45ARG = 'shuffle shuffle2'
    KEYWORDS_5ARG = 'fill fill2'
    KEYWORDS_6ARG = 'copy'
    KEYWORD_MATCH = 'match'
    KEYWORD_ENDMATCH = 'endmatch'
    KEYWORDS = map(pp.Keyword, (' '.join([
        KEYWORDS_4ARG, KEYWORDS_4ARG_WRITE, KEYWORDS_45ARG, KEYWORDS_5ARG,
        KEYWORD_MATCH, KEYWORD_ENDMATCH
    ])).split())

    RESERVED_STATES = map(pp.Keyword, 'ACCEPT ERROR HALT REJECT OUT'.split())

    COMMENT_CHAR = '%%'

    WHITESPACE = ' \t\r'

    STEP_DIRECTION = 'L R'

    pp.ParserElement.setDefaultWhitespaceChars(WHITESPACE)

    # Comments and blank lines
    whitespace = pp.White(ws=WHITESPACE).suppress()
    comment = pp.Word(COMMENT_CHAR) + pp.ZeroOrMore(
        pp.Word(pp.printables, excludeChars='\n'))
    newlines = pp.Group(
        pp.OneOrMore((comment + pp.LineEnd())
                     | pp.LineEnd())).setName('new line(s)').suppress()
    # State tags
    state_tag = pp.NotAny(pp.Or(KEYWORDS)) + pp.Word(
        pp.printables, min=2, excludeChars=':\n')
    state_tag_named = pp.NotAny(pp.Or(KEYWORDS)) + pp.Word(
        pp.printables, min=2, excludeChars=':\n').setResultsName('name')
    state_definition_tag = pp.NotAny(pp.Or(RESERVED_STATES)) + state_tag_named
    # Words and characters in alphabet
    language_word = pp.Word(pp.printables, excludeChars='\n')
    language_character = pp.Word(pp.printables, exact=1, excludeChars='\n')
    # Direction indicator
    direction_character = pp.oneOf(STEP_DIRECTION, asKeyword=True).setName(
        'direction indicator').setResultsName('direction')
    # Direction arrow for match
    match_arrow = pp.oneOf('-> -<', asKeyword=True)

    # 3-argument single line commands
    single_line_command_keyword_3 = MatchKeywords(KEYWORDS_3ARG.split())

    # 4-argument single line commands
    single_line_command_keyword_4 = MatchKeywords(KEYWORDS_4ARG.split())

    # 4-argument single line commands which accept a WORD as 3rd arg
    single_line_command_keyword_4_write = MatchKeywords(
        KEYWORDS_4ARG_WRITE.split())

    single_line_command_keyword_45 = MatchKeywords(KEYWORDS_45ARG.split())

    # 5-argument single line commands
    single_line_command_keyword_5 = MatchKeywords(KEYWORDS_5ARG.split())

    # 6-argument single line commands
    single_line_command_keyword_6 = MatchKeywords(KEYWORDS_6ARG.split())

    jump_target = pp.Forward().setResultsName("jump_target")

    # Single line expression
    single_line_expr = pp.Group((
        (single_line_command_keyword_3 + direction_character)
        | (single_line_command_keyword_4 + direction_character +
           language_character.setResultsName("stop"))
        | (single_line_command_keyword_4_write + direction_character +
           language_word.setResultsName("write") + whitespace) |
        (single_line_command_keyword_45 + direction_character +
         language_character.setResultsName("stop") + whitespace +
         pp.Optional(language_character.setResultsName("write") + whitespace))
        | (single_line_command_keyword_5 + direction_character +
           language_character.setResultsName("stop") + whitespace +
           language_character.setResultsName("write") + whitespace)
        | (single_line_command_keyword_6 + direction_character +
           language_character.setResultsName("start") + whitespace +
           language_character.setResultsName("target") + whitespace +
           language_character.setResultsName("stop") + whitespace)) +
                                jump_target)

    jump_target << (single_line_expr | state_tag)

    match_target = pp.Group(
        language_word.setResultsName("match_string") +
        match_arrow.setResultsName("direction") +
        pp.Optional(language_character.setResultsName("write") + ';') +
        jump_target)

    match_expr = pp.Group(
        pp.Keyword(KEYWORD_MATCH).setResultsName("type") +
        direction_character + pp.Optional(jump_target) + newlines +
        pp.ZeroOrMore(match_target +
                      newlines).setResultsName("match_targets") +
        pp.Keyword(KEYWORD_ENDMATCH))

    state_definition = pp.Group(
        (newlines + state_definition_tag.setName('state tag') + ':' +
         newlines +
         (single_line_expr.setResultsName("expr")
          | match_expr.setResultsName("expr")).setName('state definition')
         )).setName('state')

    alphabet = language_word.setName('alphabet').setResultsName("alphabet")

    document = alphabet + pp.ZeroOrMore(state_definition).setResultsName(
        "states") + pp.ZeroOrMore(newlines)

    return document
Example #29
0
]

# temporary modifier
# temporary modifier
temp_modifier = dice_bonus.setResultsName('temp_modifier')

# verb phrases
# verb phrases
o = L('[')
t = L(']')
wordchars = P.alphanums + string.punctuation.replace(']', '')

v_word = P.Word(wordchars)
v_words = P.OneOrMore(v_word).setResultsName('verbs')

v_word_nonterminal = v_word + P.NotAny(t)
v_words_nonterminal = P.OneOrMore(v_word_nonterminal).setResultsName('verbs')

# FIXME - [d20 1d10] should be an error
v_content = P.Optional(v_words_nonterminal) + (temp_modifier | dice) | v_words
verb_phrase = Sup(o) + v_content + Sup(t)
verb_phrase = verb_phrase.setResultsName('verb_phrase')

_test_verb_phrases = [
    ("[]", P.ParseException),
    ("[star]", "['star']"),
    ("[rock star]", "['rock', 'star']"),
    ("[woo 1d20+1]", "['woo', 1, 20, 1]"),
    ("[woo +2]", "['woo', 2]"),
    ("[woo -2]", "['woo', -2]"),
    ("[1d20+1]", "[1, 20, 1]"),
Example #30
0
class SQLParser(object):
  """SQL Parser"""

  def _LogStart(self, instring, loc, expr):
    logging.debug('Start: base_loc: %d, loc: %d, expr: %s',
                  self._base_loc, loc, expr.name)

  def _LogSuccess(self, instring, start, loc, expr, tokens):
    logging.debug('Success: base_loc: %d, loc: %d, expr: %s, tokens: %s',
                  self._base_loc, loc, expr.name, tokens)
    tokens['loc'] = self._base_loc + loc

  def _LogFailure(self, instring, start, expr, err):
    logging.debug('Failure: base_loc: %d, loc: %d, expr: %s, err: %s',
                  self._base_loc, err.loc, expr.name, err)

  def __init__(self, progress_callback=None):
    """Constructor.

    Args:
      progress_callback: If specified, called with the character location of
        the end of the last-yielded statement.
    """
    # Get all the class variables that matches _*_TOKEN
    keywords = list(SQLParser.__dict__[k]
                    for k in SQLParser.__dict__
                    if re.match(r'^_([_\w])+_TOKEN$', k))
    # Fill the grammar rule _KEYWORDS with all the keywords possible
    SQLParser.__dict__['_KEYWORDS'] << pyp.MatchFirst(keywords)

    self._loc = 0  # Last yielded line end
    self._base_loc = 0  # Start of this statement
    self._callback = progress_callback

    for key in dir(self):
      grammar_rule = getattr(self, key)
      if isinstance(grammar_rule, pyp.ParserElement):
        grammar_rule.setName(key)
        grammar_rule.setDebugActions(
            self._LogStart, self._LogSuccess, self._LogFailure)

  def _OnNewLine(self, loc):
    self._loc = loc

  def ParseString(self, string):
    logging.debug('Parsing: %r', string)
    try:
      for statement in db.XCombineSQL(db.XSplit(string, '\n',
                                                callback=self._OnNewLine)):
        yield self._QUERY.parseString(statement)[0]
        if self._callback:
          self._callback(self._loc)
        self._base_loc = self._loc + len(statement) + 1
    except pyp.ParseException as e:
      raise ParseError(e.msg, self._base_loc + e.loc)
    except db.InputRemaining as e:
      raise ParseError('Input remaining: %s' % e, self._base_loc + self._loc)

  # DISCARDED

  _COMMENT_START = pyp.Keyword(
      '--', identChars=pyp.Keyword.DEFAULT_KEYWORD_CHARS + '-')
  _COMMENT_LINE = _COMMENT_START + pyp.restOfLine
  _COMMENT_BLOCK = pyp.Regex(r'/\*(?=[^!])(?:[^*]*\*+)+?/')

  # TERMINALS

  _LINE_DELIMITER = pyp.Suppress(';').setName(';')

  _ALTER_TOKEN = pyp.CaselessKeyword('alter')
  _SELECT_TOKEN = pyp.CaselessKeyword('select')
  _CREATE_TOKEN = pyp.CaselessKeyword('create')
  _UPDATE_TOKEN = pyp.CaselessKeyword('update')
  _INSERT_TOKEN = pyp.CaselessKeyword('insert')
  _REPLACE_TOKEN = pyp.CaselessKeyword('replace')
  _DELETE_TOKEN = pyp.CaselessKeyword('delete')

  _MODIFY_TOKEN = pyp.CaselessKeyword('modify')
  _ADD_TOKEN = pyp.CaselessKeyword('add')
  _CHANGE_TOKEN = pyp.CaselessKeyword('change')
  _DROP_TOKEN = pyp.CaselessKeyword('drop')
  _CONVERT_TOKEN = pyp.CaselessKeyword('convert')
  _TO_TOKEN = pyp.CaselessKeyword('to')

  _ALL_TOKEN = pyp.CaselessKeyword('all')
  _DISTINCT_TOKEN = pyp.CaselessKeyword('distinct')
  _DISTINCTROW_TOKEN = pyp.CaselessKeyword('distinctrow')

  _FROM_TOKEN = pyp.CaselessKeyword('from').suppress()
  _WHERE_TOKEN = pyp.CaselessKeyword('where').suppress()
  _ORDER_TOKEN = pyp.CaselessKeyword('order').suppress()
  _GROUP_TOKEN = pyp.CaselessKeyword('group').suppress()
  _HAVING_TOKEN = pyp.CaselessKeyword('having').suppress()
  _LIMIT_TOKEN = pyp.CaselessKeyword('limit').suppress()
  _BY_TOKEN = pyp.CaselessKeyword('by').suppress()
  _AS_TOKEN = pyp.CaselessKeyword('as').suppress()

  _INTO_TOKEN = pyp.CaselessKeyword('into').suppress()
  _VALUES_TOKEN = pyp.CaselessKeyword('values').suppress()

  _IS_TOKEN = pyp.CaselessKeyword('is')
  _NOT_TOKEN = pyp.CaselessKeyword('not')
  _NULL_TOKEN = pyp.CaselessKeyword('null')
  _TRUE_TOKEN = pyp.CaselessKeyword('true')
  _FALSE_TOKEN = pyp.CaselessKeyword('false')
  _UNKNOWN_TOKEN = pyp.CaselessKeyword('unknown')
  _IN_TOKEN = pyp.CaselessKeyword('in')
  _CASE_TOKEN = pyp.CaselessKeyword('case')
  _WHEN_TOKEN = pyp.CaselessKeyword('when')
  _THEN_TOKEN = pyp.CaselessKeyword('then')
  _ELSE_TOKEN = pyp.CaselessKeyword('else')
  _START_TOKEN = pyp.CaselessKeyword('start')
  _END_TOKEN = pyp.CaselessKeyword('end')

  _JOIN_TOKEN = pyp.CaselessKeyword('join')
  _LEFT_TOKEN = pyp.CaselessKeyword('left')
  _RIGHT_TOKEN = pyp.CaselessKeyword('right')
  _CROSS_TOKEN = pyp.CaselessKeyword('cross')
  _INNER_TOKEN = pyp.CaselessKeyword('inner')
  _OUTER_TOKEN = pyp.CaselessKeyword('outer')
  _NATURAL_TOKEN = pyp.CaselessKeyword('natural')
  _ON_TOKEN = pyp.CaselessKeyword('on')
  _USING_TOKEN = pyp.CaselessKeyword('using')
  _STRAIGHT_JOIN_TOKEN = pyp.CaselessKeyword('straight_join')

  _LIKE_TOKEN = pyp.CaselessKeyword('like')
  _ENGINE_TOKEN = pyp.CaselessKeyword('engine')
  _IF_TOKEN = pyp.CaselessKeyword('if').suppress()
  _EXISTS_TOKEN = pyp.CaselessKeyword('exists').suppress()
  _CHARSET_TOKEN = pyp.CaselessKeyword('charset')
  _CHARACTER_TOKEN = pyp.CaselessKeyword('character')
  _NAMES_TOKEN = pyp.CaselessKeyword('names')
  _COLLATE_TOKEN = pyp.CaselessKeyword('collate')
  _INTERVAL_TOKEN = pyp.CaselessKeyword('interval')

  _DATABASE_TOKEN = pyp.CaselessKeyword('database')
  _TABLE_TOKEN = pyp.CaselessKeyword('table').suppress()
  _COLUMN_TOKEN = pyp.CaselessKeyword('column').suppress()
  _INDEX_TOKEN = pyp.CaselessKeyword('index')
  _PRIMARY_TOKEN = pyp.CaselessKeyword('primary')
  _KEY_TOKEN = pyp.CaselessKeyword('key')
  _UNIQUE_TOKEN = pyp.CaselessKeyword('unique')
  _DUPLICATE_TOKEN = pyp.CaselessKeyword('duplicate').suppress()
  _AUTO_INCREMENT_TOKEN = pyp.CaselessKeyword('auto_increment')
  _DEFAULT_TOKEN = pyp.CaselessKeyword('default').suppress()
  _USE_TOKEN = pyp.CaselessKeyword('use')
  _IGNORE_TOKEN = pyp.CaselessKeyword('ignore')
  _FORCE_TOKEN = pyp.CaselessKeyword('force')
  _CONSTRAINT_TOKEN = pyp.CaselessKeyword('constraint')
  _FOREIGN_TOKEN = pyp.CaselessKeyword('foreign')
  _RESTRICT_TOKEN = pyp.CaselessKeyword('restrict')
  _CASCADE_TOKEN = pyp.CaselessKeyword('cascade')
  _NO_TOKEN = pyp.CaselessKeyword('no')
  _ACTION_TOKEN = pyp.CaselessKeyword('action')
  _REFERENCES_TOKEN = pyp.CaselessKeyword('references')

  _TINYINT_TOKEN = pyp.CaselessKeyword('tinyint')
  _SMALLINT_TOKEN = pyp.CaselessKeyword('smallint')
  _MEDIUMINT_TOKEN = pyp.CaselessKeyword('mediumint')
  _INT_TOKEN = pyp.CaselessKeyword('int')
  _INTEGER_TOKEN = pyp.CaselessKeyword('integer')
  _BIGINT_TOKEN = pyp.CaselessKeyword('bigint')

  _UNSIGNED_TOKEN = pyp.CaselessKeyword('unsigned')

  _DECIMAL_TOKEN = pyp.CaselessKeyword('decimal')
  _DEC_TOKEN = pyp.CaselessKeyword('dec')
  _FIXED_TOKEN = pyp.CaselessKeyword('fixed')
  _FLOAT_TOKEN = pyp.CaselessKeyword('float')
  _DOUBLE_TOKEN = pyp.CaselessKeyword('double')
  _PRECISION_TOKEN = pyp.CaselessKeyword('precision')

  _DATE_TOKEN = pyp.CaselessKeyword('date')
  _DATETIME_TOKEN = pyp.CaselessKeyword('datetime')
  _TIMESTAMP_TOKEN = pyp.CaselessKeyword('timestamp')
  _TIME_TOKEN = pyp.CaselessKeyword('time')
  _YEAR_TOKEN = pyp.CaselessKeyword('year')

  _CHAR_TOKEN = pyp.CaselessKeyword('char')
  _VARCHAR_TOKEN = pyp.CaselessKeyword('varchar')
  _BINARY_TOKEN = pyp.CaselessKeyword('binary')
  _VARBINARY_TOKEN = pyp.CaselessKeyword('varbinary')

  _TINYBLOB_TOKEN = pyp.CaselessKeyword('tinyblob')
  _BLOB_TOKEN = pyp.CaselessKeyword('blob')
  _MEDIUMBLOB_TOKEN = pyp.CaselessKeyword('mediumblob')
  _LONGBLOB_TOKEN = pyp.CaselessKeyword('longblob')
  _TINYTEXT_TOKEN = pyp.CaselessKeyword('tinytext')
  _TEXT_TOKEN = pyp.CaselessKeyword('text')
  _MEDIUMTEXT_TOKEN = pyp.CaselessKeyword('mediumtext')
  _LONGTEXT_TOKEN = pyp.CaselessKeyword('longtext')

  _ENUM_TOKEN = pyp.CaselessKeyword('enum')
  _SET_TOKEN = pyp.CaselessKeyword('set')

  _BIT_TOKEN = pyp.CaselessKeyword('bit')

  _FIRST_TOKEN = pyp.CaselessKeyword('first')
  _BEFORE_TOKEN = pyp.CaselessKeyword('before')
  _AFTER_TOKEN = pyp.CaselessKeyword('after')

  _CURRENT_TIMESTAMP_TOKEN = pyp.CaselessKeyword('current_timestamp')

  _BEGIN_TOKEN = pyp.CaselessKeyword('begin')
  _TRANSACTION_TOKEN = pyp.CaselessKeyword('transaction')
  _COMMIT_TOKEN = pyp.CaselessKeyword('commit')
  _ROLLBACK_TOKEN = pyp.CaselessKeyword('rollback')

  _LOCAL_TOKEN = pyp.CaselessKeyword('local')
  _SESSION_TOKEN = pyp.CaselessKeyword('session')
  _GLOBAL_TOKEN = pyp.CaselessKeyword('global')

  ## IDENTIFIER

  _KEYWORDS = pyp.Forward()  # list of keywords, defined by __init__()

  _IDENTIFIER = pyp.Group(pyp.Word(pyp.alphas, pyp.alphanums + '_$')
                          | pyp.QuotedString('`', multiline=True, escChar='\\'))

  _CHARSET = '_' + pyp.Word(pyp.alphanums).setResultsName('character_set')

  _STRING = (pyp.Optional(_CHARSET)
             + (pyp.QuotedString('\'', multiline=True, escChar='\\')
                | pyp.QuotedString('\"', multiline=True, escChar='\\')))

  _NUMBER = pyp.Word(pyp.nums)

  _ARITH_SIGN = pyp.Word('+-', exact=1)
  _E = pyp.CaselessLiteral('E')

  _REAL_NUMBER = pyp.Combine(pyp.Optional(_ARITH_SIGN)
                             + pyp.Optional(_NUMBER) + '.' + _NUMBER
                             + pyp.Optional(_E
                                            + pyp.Optional(_ARITH_SIGN)
                                            + _NUMBER))
  _INT_NUMBER = pyp.Combine(pyp.Optional(_ARITH_SIGN)
                            + _NUMBER
                            + pyp.Optional(_E
                                           + pyp.Optional('+')
                                           + _NUMBER))

  _HEX = ((pyp.CaselessLiteral('0x').suppress()
           + pyp.Word(pyp.hexnums))
          | pyp.Regex(r"x'(?:[0-9a-fA-F])+'"))

  _VAL = pyp.Group(
          _HEX
          | pyp.OneOrMore(_STRING)
          | _REAL_NUMBER
          | _INT_NUMBER
          | _NULL_TOKEN
          | _TRUE_TOKEN
          | _FALSE_TOKEN).setResultsName('val')

  ## TYPES

  _FIELD_LIST = pyp.Group(pyp.Suppress('(')
                          + pyp.delimitedList(_IDENTIFIER)
                          + pyp.Suppress(')')
                          ).setResultsName('fields')

  _STRING_LIST = pyp.Group(pyp.Suppress('(')
                           + pyp.delimitedList(_STRING)
                           + pyp.Suppress(')')
                           ).setResultsName('values')

  _TYPE_SIZE = (pyp.Suppress('(')
                + _NUMBER.setName('type_size')
                + pyp.Suppress(')'))

  _TYPE_PRECISION = (pyp.Suppress('(')
                     + _NUMBER.setName('type_precision')
                     + pyp.Suppress(',')
                     + _NUMBER.setName('type_scale')
                     + pyp.Suppress(')'))

  # Types that don't take arguments.
  _SIMPLE_TYPE = (_DATE_TOKEN
                  | _DATETIME_TOKEN
                  | _TIMESTAMP_TOKEN
                  | _TIME_TOKEN
                  | _YEAR_TOKEN
                  | _TINYTEXT_TOKEN
                  | _TEXT_TOKEN
                  | _MEDIUMTEXT_TOKEN
                  | _LONGTEXT_TOKEN
                  | _TINYBLOB_TOKEN
                  | _BLOB_TOKEN
                  | _MEDIUMBLOB_TOKEN
                  | _LONGBLOB_TOKEN).setResultsName('type_type')

  _BIT = (_BIT_TOKEN.setResultsName('type_type')
          + pyp.Optional(_TYPE_SIZE))

  _ENUM = (_ENUM_TOKEN.setResultsName('type_type')
           + _STRING_LIST)

  _SET_TYPE = (_SET_TOKEN.setResultsName('type_type')
               + _STRING_LIST)

  _INTS = ((_TINYINT_TOKEN
            | _SMALLINT_TOKEN
            | _MEDIUMINT_TOKEN
            | _INT_TOKEN
            | _INTEGER_TOKEN
            | _BIGINT_TOKEN).setResultsName('type_type')
           + pyp.Optional(_TYPE_SIZE)
           + pyp.Optional(_UNSIGNED_TOKEN))

  _REALS = ((_DECIMAL_TOKEN
             | _DEC_TOKEN
             | _FIXED_TOKEN
             | _FLOAT_TOKEN
             | _DOUBLE_TOKEN + pyp.Optional(_PRECISION_TOKEN)
             ).setResultsName('type_type')
            + pyp.Optional(_TYPE_PRECISION))

  _CHARS = ((_VARCHAR_TOKEN
             | _CHAR_TOKEN
             | _BINARY_TOKEN
             | _VARBINARY_TOKEN).setResultsName('type_type')
            + pyp.Optional(_TYPE_SIZE)
            + pyp.Optional(_BINARY_TOKEN))

  _TYPE = pyp.Group(_BIT
                    | _ENUM
                    | _SET_TYPE
                    | _INTS
                    | _REALS
                    | _CHARS
                    | _SIMPLE_TYPE
                    ).setResultsName('type')

  ## GRAMMAR

  # COMMONS

  _DB_NAME = _IDENTIFIER.setResultsName('database')

  _TABLE_NAME_ONLY = _IDENTIFIER.setResultsName('table')

  _TABLE_NAME = pyp.Group((_DB_NAME + '.' + _TABLE_NAME_ONLY)
                          | _TABLE_NAME_ONLY).setResultsName('table_spec')

  _COLUMN_NAME_WILD = (_IDENTIFIER | '*').setResultsName('column')

  _COLUMN_NAME = pyp.Group(
      (_DB_NAME + '.' + _TABLE_NAME_ONLY + '.' + _COLUMN_NAME_WILD)
      | (_TABLE_NAME_ONLY + '.' + _COLUMN_NAME_WILD)
      | _COLUMN_NAME_WILD).setResultsName('column_spec')

  _INDEX_NAME = _IDENTIFIER.setResultsName('index')


  _COLUMN_LIST = pyp.Group(pyp.Suppress('(')
                           + pyp.delimitedList(_COLUMN_NAME)
                           + pyp.Suppress(')')
                           ).setResultsName('columns')

  # DATA DEFINITION COMMONS

  _DEFAULT_VAL = (_DEFAULT_TOKEN
                  + pyp.Group(_NULL_TOKEN
                              | _VAL
                              | _CURRENT_TIMESTAMP_TOKEN
                              ).setResultsName('default'))

  _COLUMN_CONSTRAINT = pyp.Group(pyp.Optional(_NOT_TOKEN)
                                 + _NULL_TOKEN
                                 ).setResultsName('constraint')

  _POSITIONAL = pyp.Group(_FIRST_TOKEN
                          | ((_BEFORE_TOKEN | _AFTER_TOKEN) + _COLUMN_NAME)
                          ).setResultsName('position')

  # Optional column flags:
  #  - CHARSET <charset>
  #  - CHARACTER SET <charset>
  #  - COLLATE <collate name>
  #  - DEFAULT '<value>'
  #  - AUTO_INCREMENT
  #  - NOT NULL
  #  - ON UPDATE CURRENT_TIMESTAMP
  _COLUMN_FLAGS = pyp.Group(
      (_CHARSET_TOKEN + _IDENTIFIER.setResultsName('charset'))
      | (_CHARACTER_TOKEN + _SET_TOKEN + _IDENTIFIER.setResultsName('charset'))
      | (_COLLATE_TOKEN + _IDENTIFIER.setResultsName('collate'))
      | _COLUMN_CONSTRAINT
      | _DEFAULT_VAL
      | _AUTO_INCREMENT_TOKEN.setResultsName('option')
      | (_ON_TOKEN + _UPDATE_TOKEN + _CURRENT_TIMESTAMP_TOKEN)
      ).setResultsName('column_flags')

  _COLUMN_DEFINITION = pyp.Group(_TYPE
                                 + pyp.ZeroOrMore(_COLUMN_FLAGS)
                                 ).setResultsName('column_definition')

  _KEY_DEFINITION = pyp.Group(
      (((pyp.Optional(_UNIQUE_TOKEN).setResultsName('key_option')
         + (_INDEX_TOKEN | _KEY_TOKEN).setResultsName('key_type'))
        | _UNIQUE_TOKEN.setResultsName('key_type'))
       + pyp.Optional(_IDENTIFIER).setResultsName('key_name')
       + _FIELD_LIST)
      | ((_PRIMARY_TOKEN + _KEY_TOKEN).setResultsName('key_type')
         + _FIELD_LIST)
      ).setResultsName('key_definition')


  # ALTER STATEMENTS

  # ADD COLUMN columnname TYPE [BEFORE | AFTER ...]
  # ADD COLUMN (columnname TYPE, ...) [BEFORE | AFTER ...]
  _ALTER_TABLE_ADD_COLUMN = pyp.Group(
      _ADD_TOKEN + pyp.Optional(_COLUMN_TOKEN)
      + ((_COLUMN_NAME + _COLUMN_DEFINITION)
         | (pyp.Suppress('(')
            + pyp.delimitedList(_COLUMN_NAME + _COLUMN_DEFINITION)
            + pyp.Suppress(')')))
      + pyp.ZeroOrMore(_COLUMN_FLAGS)
      + pyp.Optional(_PRIMARY_TOKEN + _KEY_TOKEN)
      + pyp.Optional(_POSITIONAL)
      ).setResultsName('add_column')

  _REFERENCE_OPTION = pyp.Group(
      _RESTRICT_TOKEN
      | _CASCADE_TOKEN
      | (_SET_TOKEN + _NULL_TOKEN)
      | (_NO_TOKEN + _ACTION_TOKEN)
  ).setResultsName('reference_option')

  _CONSTRAINT_DEFINITION = pyp.Group(
    pyp.Optional(
        _CONSTRAINT_TOKEN
        + pyp.Optional(_IDENTIFIER).setResultsName('constraint_name')
        )
    + _FOREIGN_TOKEN + _KEY_TOKEN
    + pyp.Optional(_IDENTIFIER).setResultsName('key_name')
    + _FIELD_LIST
    + _REFERENCES_TOKEN
    + _TABLE_NAME
    + _FIELD_LIST
    + pyp.Optional(_ON_TOKEN
                   + _DELETE_TOKEN
                   + _REFERENCE_OPTION)
    + pyp.Optional(_ON_TOKEN
                   + _UPDATE_TOKEN
                   + _REFERENCE_OPTION)
  )

  _ALTER_TABLE_ADD_CONSTRAINT = pyp.Group(
      _ADD_TOKEN
      + _CONSTRAINT_DEFINITION
      ).setResultsName('add_constraint')

  _ALTER_TABLE_DROP_FOREIGN_KEY = pyp.Group(
      _DROP_TOKEN
      + _FOREIGN_TOKEN
      + _KEY_TOKEN
      + _IDENTIFIER.setResultsName('constraint_name')
      ).setResultsName('drop_foreign_key')

  # ADD [UNIQUE] INDEX | KEY ...
  # ADD UNIQUE ...
  _ALTER_TABLE_ADD_INDEX = pyp.Group(
      _ADD_TOKEN
      + ((pyp.Optional(_UNIQUE_TOKEN).setResultsName('key_option')
          + (_INDEX_TOKEN | _KEY_TOKEN))
         | (_UNIQUE_TOKEN).setResultsName('key_type'))
      + pyp.Optional(_IDENTIFIER).setResultsName('key_name')
      + _FIELD_LIST
      ).setResultsName('add_index')

  _ALTER_TABLE_ADD_PRIMARY_KEY = pyp.Group(
      _ADD_TOKEN + _PRIMARY_TOKEN + _KEY_TOKEN
      + _FIELD_LIST
      ).setResultsName('add_primary_key')

  _ALTER_TABLE_ALTER = pyp.Group(
      _ALTER_TOKEN + pyp.Optional(_COLUMN_TOKEN)
      + _COLUMN_NAME
      + ((_SET_TOKEN + _DEFAULT_VAL)
         | (_DROP_TOKEN + _DEFAULT_TOKEN))
      ).setResultsName('alter_column')

  _ALTER_TABLE_MODIFY = pyp.Group(
      _MODIFY_TOKEN + pyp.Optional(_COLUMN_TOKEN)
      + (_COLUMN_NAME + _COLUMN_DEFINITION)
      + pyp.Optional(_POSITIONAL)
      ).setResultsName('modify_column')

  _ALTER_TABLE_CHANGE = pyp.Group(
      _CHANGE_TOKEN + pyp.Optional(_COLUMN_TOKEN)
      + _COLUMN_NAME
      + _COLUMN_NAME.setResultsName('column_spec_new')
      + _COLUMN_DEFINITION
      ).setResultsName('change_column')

  _ALTER_TABLE_DROP_COLUMN = pyp.Group(
      _DROP_TOKEN + pyp.Optional(_COLUMN_TOKEN)
      + _COLUMN_NAME
      ).setResultsName('drop_column')

  _ALTER_TABLE_DROP_PRIMARY_KEY = pyp.Group(
      _DROP_TOKEN + _PRIMARY_TOKEN + _KEY_TOKEN
      ).setResultsName('drop_primary_key')

  _ALTER_TABLE_DROP_INDEX = pyp.Group(
      _DROP_TOKEN + (_INDEX_TOKEN | _KEY_TOKEN)
      + _IDENTIFIER.setResultsName('key_name')
      ).setResultsName('drop_index')

  _ALTER_TABLE_CONVERT = pyp.Group(
      _CONVERT_TOKEN + _TO_TOKEN + _CHARACTER_TOKEN + _SET_TOKEN
      + _IDENTIFIER.setResultsName('character_set')
      ).setResultsName('convert')

  _ALTER_CHARACTER_SET = pyp.Group(
      _CHARACTER_TOKEN + _SET_TOKEN
      + _IDENTIFIER.setResultsName('character_set')
      ).setResultsName('alter_charset')

  # The various ALTER TABLE operations supported:
  # - ADD PRIMARY KEY
  # - ADD INDEX
  # - ADD COLUMN
  # - CHANGE
  # - DROP
  # - ALTER
  _ALTER_TABLE_OPERATIONS = pyp.Group(
      _ALTER_TABLE_MODIFY
      | _ALTER_TABLE_ADD_PRIMARY_KEY
      | _ALTER_TABLE_ADD_CONSTRAINT
      | _ALTER_TABLE_DROP_FOREIGN_KEY
      | _ALTER_TABLE_ADD_INDEX
      | _ALTER_TABLE_ADD_COLUMN
      | _ALTER_TABLE_CHANGE
      | _ALTER_TABLE_DROP_PRIMARY_KEY
      | _ALTER_TABLE_DROP_INDEX
      | _ALTER_TABLE_DROP_COLUMN
      | _ALTER_TABLE_ALTER
      | _ALTER_TABLE_CONVERT
      | _ALTER_CHARACTER_SET
      ).setResultsName('operations')

  _ALTER_TABLE_SQL = pyp.Group(_ALTER_TOKEN
                               + _TABLE_TOKEN
                               + _TABLE_NAME
                               + pyp.delimitedList(_ALTER_TABLE_OPERATIONS)
                               ).setResultsName('alter')

  _ALTER_DATABASE_OPERATIONS = pyp.Group(
      _ALTER_CHARACTER_SET
      ).setResultsName('operations')

  _ALTER_DATABASE_SQL = pyp.Group(
      _ALTER_TOKEN
      + _DATABASE_TOKEN
      + _DB_NAME
      + pyp.delimitedList(_ALTER_DATABASE_OPERATIONS)
      ).setResultsName('alter_db')

  # CREATE STATEMENTS

  _CREATE_DEFINITION = pyp.Group(_KEY_DEFINITION
                                 | _CONSTRAINT_DEFINITION
                                 | (_COLUMN_NAME
                                    + _COLUMN_DEFINITION)
                                 ).setResultsName('operation')

  # Match on IF NOT EXISTS
  _CREATE_NO_OVERWRITE = _IF_TOKEN + _NOT_TOKEN + _EXISTS_TOKEN

  _CREATE_OPERATIONS = pyp.Group(pyp.delimitedList(_CREATE_DEFINITION)
                                 ).setResultsName('operations')

  # CREATE TABLE table options can come in any order.  There may be
  # zero or many of them
  _TABLE_FLAGS = pyp.Group(_ENGINE_TOKEN
                           | (_DEFAULT_TOKEN + _CHARSET_TOKEN)
                           | _CHARSET_TOKEN
                           | (_CHARACTER_TOKEN + _SET_TOKEN)
                           | (_DEFAULT_TOKEN + _CHARACTER_TOKEN + _SET_TOKEN)
                           | _COLLATE_TOKEN
                           ).setResultsName('table_flags_type')

  # CREATE TABLE table options are always of the format: FLAG=VALUE
  _TABLE_FLAGS_DEF = pyp.Group(
      _TABLE_FLAGS
      + pyp.Optional(pyp.Suppress('='))
      + _IDENTIFIER.setResultsName('table_flags_identifier')
      ).setResultsName('table_flags_definition')

  _CREATE_TABLE_SQL = pyp.Group(
      _CREATE_TOKEN
      + _TABLE_TOKEN
      + pyp.Optional(_CREATE_NO_OVERWRITE)
      + _TABLE_NAME
      + pyp.Suppress('(')
      + _CREATE_OPERATIONS
      + pyp.Suppress(')')
      + pyp.ZeroOrMore(_TABLE_FLAGS_DEF).setResultsName('table_flags')
      ).setResultsName('create_table')

  _CREATE_TABLE_LIKE_SQL = pyp.Group(
      _CREATE_TOKEN
      + _TABLE_TOKEN
      + pyp.Optional(_CREATE_NO_OVERWRITE)
      + _TABLE_NAME
      + _LIKE_TOKEN
      + _TABLE_NAME
      ).setResultsName('create_table_like')

  # DROP TABLE [IF EXISTS] table
  _DROP_TABLE_SQL = pyp.Group(_DROP_TOKEN
                              + _TABLE_TOKEN
                              + pyp.Optional(_IF_TOKEN + _EXISTS_TOKEN)
                              + pyp.delimitedList(_TABLE_NAME)
                              ).setResultsName('drop_table')

  # CREATE DATABASE dbname
  _CREATE_DATABASE_SQL = pyp.Group(_CREATE_TOKEN
                                   + _DATABASE_TOKEN
                                   + pyp.Optional(_CREATE_NO_OVERWRITE)
                                   + _DB_NAME
                                   ).setResultsName('create_database')

  # DROP DATABASE dbname
  _DROP_DATABASE_SQL = pyp.Group(_DROP_TOKEN
                                 + _DATABASE_TOKEN
                                 + pyp.Optional(_IF_TOKEN + _EXISTS_TOKEN)
                                 + _DB_NAME
                                 ).setResultsName('drop_database')

  # CREATE INDEX idx ON table (column, ...)
  _CREATE_INDEX_SQL = (
      _CREATE_TOKEN
      + pyp.Optional(_UNIQUE_TOKEN).setResultsName('key_option')
      + _INDEX_TOKEN
      + _INDEX_NAME.setResultsName('key_name')
      + _ON_TOKEN
      + _TABLE_NAME
      + _COLUMN_LIST)

  # EXPRESSIONS

  _BINOP1 = pyp.oneOf("* / %")
  _BINOP2 = pyp.oneOf("+ - << >> | &")
  _BINOP3 = pyp.oneOf(":= = != <> < > >= <=")
  _BINOP4 = pyp.oneOf("like between regexp", caseless=True)  # optional "NOT"
  _BINOP5 = pyp.oneOf("and", caseless=True)
  _BINOP6 = pyp.oneOf("or", caseless=True)

  _EXPRESSION = pyp.Forward()  # _EXPRESSION is recursive

  _DATE_FUNCTION_NAME = pyp.oneOf("date_add date_sub", caseless=True
                                  ).setResultsName('function_name')

  _INTERVAL_UNIT = pyp.oneOf(
      "microsecond second minute hour day week month quarter year "
      "second_microsecond minute_microsecond minute_second hour_microsecond "
      "hour_second hour_minute day_microsecond day_second day_minute "
      "day_hour year_month", caseless=True
      ).setResultsName('interval_unit')

  _DATE_FUNCTION = pyp.Group(
      _DATE_FUNCTION_NAME
      + pyp.Suppress('(')
      + _EXPRESSION.setResultsName('arg')
      + pyp.Suppress(',')
      + _INTERVAL_TOKEN
      + _EXPRESSION.setResultsName('interval_val')
      + _INTERVAL_UNIT
      + pyp.Suppress(')')
      ).setResultsName('function')

  _FUNCTION_NAME = (_IDENTIFIER
                    ).setResultsName('function_name')

  _ARG_LIST = pyp.Group(
      pyp.Suppress('(')
      + pyp.Optional(pyp.delimitedList(_EXPRESSION.setResultsName('arg')))
      + pyp.Suppress(')')
      ).setResultsName('args')

  _FUNCTION = pyp.Group(
      _FUNCTION_NAME
      + _ARG_LIST
      ).setResultsName('function')

  _VARIABLE = pyp.Group(
      pyp.Group(pyp.Literal('@@')
                | pyp.Literal('@')
                ).setResultsName('scope')
      + _IDENTIFIER.setResultsName('variable'))

  _LVAL = ((pyp.Suppress('(') + _EXPRESSION + pyp.Suppress(')'))
           | _VAL
           | _FUNCTION
           | _DATE_FUNCTION
           | _COLUMN_NAME + pyp.Optional(
               _COLLATE_TOKEN + _IDENTIFIER.setResultsName('collate'))
           | _VARIABLE)

  _IN_EXPRESSION = pyp.Group(
      _LVAL
      + pyp.Optional(_NOT_TOKEN)
      + _IN_TOKEN
      + pyp.Suppress('(')
      + pyp.delimitedList(_VAL)
      + pyp.Suppress(')')
      ).setResultsName('in')

  _IS_EXPRESSION = pyp.Group(
      _LVAL
      + _IS_TOKEN
      + pyp.Optional(_NOT_TOKEN)
      + (_NULL_TOKEN | _TRUE_TOKEN | _FALSE_TOKEN | _UNKNOWN_TOKEN)
      ).setResultsName('is')

  _CASES_LIST = (
      pyp.OneOrMore(_WHEN_TOKEN
                    + _EXPRESSION
                    + _THEN_TOKEN
                    + _EXPRESSION)
      + pyp.Optional(_ELSE_TOKEN
                     + _EXPRESSION))

  _CASE_EXPRESSION = pyp.Group(
      _CASE_TOKEN
      + (_CASES_LIST
         | (_EXPRESSION + _CASES_LIST))
      + _END_TOKEN).setResultsName('case')

  _UNARY = (
      _NOT_TOKEN
      | '!'
      | '-')

  _EXPRESSION0 = (
      _IS_EXPRESSION
      | _IN_EXPRESSION
      | _CASE_EXPRESSION
      | (pyp.Optional(_UNARY) + _LVAL))

  _EXPRESSION1 = (
      pyp.Group(_EXPRESSION0
                + pyp.ZeroOrMore(_BINOP1 + _EXPRESSION0)).setResultsName('ex'))

  _EXPRESSION2 = (
      pyp.Group(_EXPRESSION1
                + pyp.ZeroOrMore(_BINOP2 + _EXPRESSION1)).setResultsName('ex'))

  _EXPRESSION3 = (
      pyp.Group(_EXPRESSION2
                + pyp.ZeroOrMore(_BINOP3 + _EXPRESSION2)).setResultsName('ex'))
  _EXPRESSION4 = (
      pyp.Group(_EXPRESSION3
                + pyp.ZeroOrMore(
                    pyp.Optional(_NOT_TOKEN) + _BINOP4 + _EXPRESSION3)
                ).setResultsName('ex'))

  _EXPRESSION5 = (
      pyp.Group(_EXPRESSION4
                + pyp.ZeroOrMore(_BINOP5 + _EXPRESSION4)).setResultsName('ex'))

  _EXPRESSION << (
      pyp.Group(_EXPRESSION5
                + pyp.ZeroOrMore(_BINOP6 + _EXPRESSION5)).setResultsName('ex'))

  # SET STATEMENT

  _SET_VARIABLE = (
      pyp.Optional(
          _LOCAL_TOKEN
          | _SESSION_TOKEN
          | _GLOBAL_TOKEN
          | pyp.Literal('@@')
          | pyp.Literal('@')
          ).setResultsName('scope')
      + _IDENTIFIER.setResultsName('variable')
      + pyp.Literal('=')
      + _EXPRESSION)

  _SET_CHARSET = (
      _CHARACTER_TOKEN
      + _SET_TOKEN
      + _EXPRESSION)

  _SET_NAMES = (
      _NAMES_TOKEN
      + _EXPRESSION)

  _SET_SQL = pyp.Group(
      _SET_TOKEN
      + pyp.delimitedList(_SET_VARIABLE
                          | _SET_CHARSET
                          | _SET_NAMES))

  # TABLE REFERENCE

  _INDEX_HINT = ((_USE_TOKEN | _IGNORE_TOKEN | _FORCE_TOKEN)
                 + (_INDEX_TOKEN | _KEY_TOKEN)
                 + pyp.Suppress('(')
                 + pyp.delimitedList(_IDENTIFIER)
                 + pyp.Suppress(')'))

  _ALIAS = (pyp.Optional(_AS_TOKEN)
            + pyp.NotAny(_KEYWORDS)
            + _IDENTIFIER.setResultsName('alias'))

  _TABLE = (pyp.Group(_TABLE_NAME
                      + pyp.Optional(_ALIAS)).setResultsName('table_alias')
            + pyp.Optional(pyp.delimitedList(_INDEX_HINT)))

  _JOIN_CONDITION = ((_ON_TOKEN + _EXPRESSION)
                     | pyp.Group(_USING_TOKEN
                                 + _COLUMN_LIST).setResultsName('using'))

  _JOIN_LEFT_RIGHT = ((_LEFT_TOKEN | _RIGHT_TOKEN)
                      + pyp.Optional(_OUTER_TOKEN))

  _JOIN_SIDE = pyp.Group((_INNER_TOKEN | _CROSS_TOKEN)
                         |(_NATURAL_TOKEN
                           + pyp.Optional(_JOIN_LEFT_RIGHT))
                         | _JOIN_LEFT_RIGHT
                         ).setResultsName('join_side')

  _TABLE_JOIN = pyp.Group(
      pyp.Optional(_JOIN_SIDE)
      + (_JOIN_TOKEN | _STRAIGHT_JOIN_TOKEN)
      + _TABLE
      + pyp.Optional(_JOIN_CONDITION)).setResultsName('tablejoin')

  _TABLE_REFERENCE = _TABLE + pyp.ZeroOrMore(_TABLE_JOIN)
  _TABLE_REFERENCES = pyp.Group(pyp.delimitedList(_TABLE_REFERENCE))


  # DATA MANIPULATION COMMONS

  _EXPRESSION_LIST = pyp.Group(pyp.delimitedList(_EXPRESSION))

  _WHERE = (_WHERE_TOKEN
            + _EXPRESSION_LIST.setResultsName('where'))


  _ORDER_BY = (_ORDER_TOKEN
               + _BY_TOKEN
               + _EXPRESSION_LIST.setResultsName('order_by'))

  _GROUP_BY = (_GROUP_TOKEN
               + _BY_TOKEN
               + _EXPRESSION_LIST.setResultsName('group_by'))

  _HAVING = (_HAVING_TOKEN
             + _EXPRESSION_LIST.setResultsName('having'))

  _LIMIT = (_LIMIT_TOKEN
            + _NUMBER.setResultsName('limit'))

  _SET_VALUE = pyp.Group(_COLUMN_NAME
                         + pyp.Suppress('=')
                         + _EXPRESSION.setResultsName('set_value')
                         ).setResultsName('set')

  _SET_VALUE_LIST = pyp.Group(pyp.delimitedList(_SET_VALUE)
                              ).setResultsName('sets')

  _SET = (_SET_TOKEN.suppress()
          + _SET_VALUE_LIST)

  # SELECT STATEMENTS

  _SELECT_EXPRESSION = (pyp.Group(
      _EXPRESSION.setResultsName('select_expression')
      + pyp.Optional(_AS_TOKEN
                     + _IDENTIFIER.setResultsName('alias')))
                        | pyp.Suppress('*'))

  _SELECT_FROM = pyp.Group(_FROM_TOKEN
                           + _TABLE_REFERENCES).setResultsName('select_from')

  _SELECT_SQL_2 = (_SELECT_FROM
                   + pyp.Optional(_WHERE)
                   + pyp.Optional(_GROUP_BY)
                   + pyp.Optional(_HAVING)
                   + pyp.Optional(_ORDER_BY)
                   + pyp.Optional(_LIMIT))

  _SELECT_OPTIONS = (_ALL_TOKEN
                     | _DISTINCT_TOKEN
                     | _DISTINCTROW_TOKEN)

  _SELECT_SQL = pyp.Group(_SELECT_TOKEN
                          + pyp.Optional(_SELECT_OPTIONS)
                          + pyp.delimitedList(_SELECT_EXPRESSION)
                          .setResultsName('select_expressions')
                          + pyp.Optional(_SELECT_SQL_2)
                          ).setResultsName('select')


  # UPDATE STATEMENTS

  _UPDATE_TABLE = (_TABLE_NAME
                   + _SET
                   + pyp.Optional(_WHERE)
                   + pyp.Optional(_ORDER_BY)
                   + pyp.Optional(_LIMIT))

  _UPDATE_TABLE_REFERENCE = (_TABLE_REFERENCES
                             + _SET
                             + pyp.Optional(_WHERE))

  _UPDATE_SQL = pyp.Group(_UPDATE_TOKEN
                          + (_UPDATE_TABLE
                             | _UPDATE_TABLE_REFERENCE)
                          ).setResultsName('update')
  # INSERT/REPLACE STATEMENTS

  _VALUES = pyp.Group(pyp.Suppress('(')
                      + pyp.delimitedList(_EXPRESSION)
                      + pyp.Suppress(')')
                      ).setResultsName('vals')

  _INSERT_VALUES = (pyp.Optional(_COLUMN_LIST)
                    + _VALUES_TOKEN
                    + pyp.delimitedList(_VALUES))

  _INSERT_SET = _SET

  _INSERT_SELECT = (pyp.Optional(_COLUMN_LIST)
                    + pyp.Optional(pyp.Suppress('('))
                    + pyp.Group(_SELECT_SQL).setResultsName('source_select')
                    + pyp.Optional(pyp.Suppress(')')))

  _ON_DUPLICATE_KEY_UPDATE = (_ON_TOKEN
                              + _DUPLICATE_TOKEN
                              + _KEY_TOKEN
                              + _UPDATE_TOKEN
                              + _SET_VALUE_LIST)

  _INSERT_SQL = pyp.Group(_INSERT_TOKEN
                          + pyp.Optional(_IGNORE_TOKEN)
                          + pyp.Optional(_INTO_TOKEN)
                          + _TABLE_NAME
                          + (_INSERT_VALUES
                             | _INSERT_SET
                             | _INSERT_SELECT)
                          + pyp.Optional(_ON_DUPLICATE_KEY_UPDATE)
                          ).setResultsName('insert')

  _REPLACE_SQL = pyp.Group(_REPLACE_TOKEN
                           + pyp.Optional(_INTO_TOKEN)
                           + _TABLE_NAME
                           + (_INSERT_VALUES
                              | _INSERT_SET
                              | _INSERT_SELECT)
                           ).setResultsName('replace')

  # DELETE STATEMENTS

  # DELETE FROM table WHERE ... [ORDER BY ...] [LIMIT ...]
  # WHERE ... is not optional because sql.par demands its existence
  # in this statement type.
  _DELETE_SIMPLE_SQL = pyp.Group(_DELETE_TOKEN
                                 + _FROM_TOKEN
                                 + _TABLE_NAME
                                 + pyp.Optional(_WHERE)
                                 + pyp.Optional(_ORDER_BY)
                                 + pyp.Optional(_LIMIT)
                                 ).setResultsName('delete')

  # DELETE table FROM table_references [WHERE ...]
  _DELETE_MULTI_SQL = pyp.Group(_DELETE_TOKEN
                                + pyp.delimitedList(_TABLE_NAME
                                                    + pyp.Optional('.*'))
                                + _FROM_TOKEN
                                + _TABLE_REFERENCES.setResultsName('exclude')
                                + (pyp.Group(pyp.Optional(_WHERE))
                                   .setResultsName('exclude'))
                                ).setResultsName('delete')

  # DELETE FROM table USING table_references [WHERE ...]
  _DELETE_MULTI_SQL2 = pyp.Group(_DELETE_TOKEN
                                 + _FROM_TOKEN
                                 + pyp.delimitedList(_TABLE_NAME
                                                     + pyp.Optional('.*'))
                                 + _USING_TOKEN
                                 + _TABLE_REFERENCES.setResultsName('exclude')
                                 + (pyp.Group(pyp.Optional(_WHERE))
                                    .setResultsName('exclude'))
                                 ).setResultsName('delete')

  # TRANSACTIONS
  _START_TRANSACTION_SQL = pyp.Group((_START_TOKEN + _TRANSACTION_TOKEN)
                                     | _BEGIN_TOKEN
                                     ).setResultsName('start_transaction')

  _END_TRANSACTION_SQL = pyp.Group(_COMMIT_TOKEN
                                   | _ROLLBACK_TOKEN
                                   ).setResultsName('end_transaction')

  # UNSUPPORTED QUERIES

  _RENAME_TABLE_SQL = (pyp.CaselessKeyword('rename') +
                       pyp.SkipTo(_LINE_DELIMITER).suppress())

  _TRUNCATE_SQL = (pyp.CaselessKeyword('truncate')
                   + pyp.SkipTo(_LINE_DELIMITER).suppress())

  # VERSIONED COMMENTS
  _STATEMENT = pyp.Forward()
  _VERSIONED_COMMENT = (pyp.Literal('/*!')
                        + pyp.Optional(_NUMBER.setResultsName('min_version'))
                        + _STATEMENT
                        + pyp.Literal('*/'))

  # MAIN

  _STATEMENT << pyp.Group(_ALTER_TABLE_SQL
                          | _ALTER_DATABASE_SQL
                          | _CREATE_TABLE_SQL
                          | _CREATE_TABLE_LIKE_SQL
                          | _DROP_TABLE_SQL
                          | _RENAME_TABLE_SQL
                          | _SELECT_SQL
                          | _UPDATE_SQL
                          | _INSERT_SQL
                          | _REPLACE_SQL
                          | _DELETE_MULTI_SQL
                          | _DELETE_MULTI_SQL2
                          | _DELETE_SIMPLE_SQL
                          | _TRUNCATE_SQL
                          | _START_TRANSACTION_SQL
                          | _END_TRANSACTION_SQL
                          | _CREATE_DATABASE_SQL
                          | _DROP_DATABASE_SQL
                          | _CREATE_INDEX_SQL
                          | _SET_SQL
                          | _VERSIONED_COMMENT
                          ).setResultsName('statement')

  _QUERY = pyp.Group(_STATEMENT
                     + _LINE_DELIMITER).setResultsName('query')
  _QUERY.ignore(_COMMENT_LINE)
  _QUERY.ignore(_COMMENT_BLOCK)