コード例 #1
0

def setBotName(newname):
    botname << CL(newname)


identifier = P.Word(P.alphas + "_",
                    P.alphanums + "_").setResultsName('identifier')
command_leader = L(".")
hail = (botname + P.oneOf(": ,")) | (botname + P.White())
command_args = P.restOfLine.setResultsName('command_args')

command = (
    P.StringStart() + Sup(command_leader | hail) +
    identifier.setResultsName('command_identifier') +
    Sup(P.Optional(P.White())) +
    command_args.setResultsName('command_args')).setResultsName('command')

_test_commands = [
    (".hello", "['hello', '']"),  # {{{
    (".foo bar", "['foo', 'bar']"),
    (". foo", "['foo', '']"),
    ("..foo", P.ParseException),
    ("TestBot:foo", "['foo', '']"),
    ("tesTBot,foo", "['foo', '']"),
    ("TestBot foo", "['foo', '']"),
    ("TestBot: foo", "['foo', '']"),
    ("tesTBot, foo", "['foo', '']"),
    ("tesTBotfoo", P.ParseException),
]  # }}}
コード例 #2
0
# possible operands:
#   - hexadecimal number
#   - decimal number
#   - identifier
#   - macro function, which is basically expanded via #define
#     to an expression
__numlitl = pypa.Literal('l').suppress() | pypa.Literal('L').suppress()
__numlitu = pypa.Literal('u').suppress() | pypa.Literal('U').suppress()

__string = pypa.QuotedString('\'', '\\')

__hexadec = \
    pypa.Literal('0x').suppress() + \
    pypa.Word(pypa.hexnums). \
        setParseAction(lambda t: str(int(t[0], 16))) + \
    pypa.Optional(__numlitu) + \
    pypa.Optional(__numlitl) + \
    pypa.Optional(__numlitl)

__integer = \
    pypa.Optional('~') + \
    pypa.Word(pypa.nums + '-').setParseAction(lambda t: str(int(t[0]))) + \
    pypa.Optional(pypa.Suppress(pypa.Literal('U'))) + \
    pypa.Optional(pypa.Suppress(pypa.Literal('L'))) + \
    pypa.Optional(pypa.Suppress(pypa.Literal('L')))

__identifier = \
    pypa.Word(pypa.alphanums + '_' + '-' + '@' + '$').setParseAction(_collectDefines)
__arg = pypa.Word(pypa.alphanums + '_')
__args = __arg + pypa.ZeroOrMore(pypa.Literal(',').suppress() + \
                                 __arg)
コード例 #3
0
    return pypar.Regex(r"[^\s\n" + re.escape(disallowed_delimiter) + r"]+")


pypar.ParserElement.setDefaultWhitespaceChars(" \t")
table_parser = pypar.NoMatch()
table_cell_separators = ["|", "/", ","]
for separator in table_cell_separators:
    value = pypar.Combine(word_token_regex(separator) * (0, 10),
                          joinString=' ',
                          adjacent=False)
    value.setParseAction(lambda start, tokens: (start, tokens[0]))
    empty = pypar.Empty()
    empty.setParseAction(lambda start, tokens: (start, tokens))
    value = pypar.Group(value + empty)
    row = pypar.Group(
        pypar.Optional(separator).suppress() +
        (value + pypar.Literal(separator).suppress()) * (1, None) +
        pypar.Optional(value) +
        (pypar.StringEnd() | pypar.Literal("\n")).suppress() +
        pypar.Optional("\n").suppress())
    table_parser ^= (
        (pypar.LineStart() + pypar.Optional(pypar.White())).suppress() +
        # Allow line breaks for table headings
        row + pypar.Optional(
            pypar.Regex(r"[\-_=]{3,}") + pypar.Literal("\n") *
            (1, 2)).suppress() + row * (0, None)).setResultsName("delimiter:" +
                                                                 separator)
table_parser.parseWithTabs()

key_value_separators = [":", "-", ">"]
key_value_list_parser = pypar.NoMatch()
コード例 #4
0
import sys

import numpy as np
import pyparsing

import configuration_space

# Build pyparsing expressions for params
pp_param_name = pyparsing.Word(pyparsing.alphanums + "_" + "-" + "@" + "." +
                               ":" + ";" + "\\" + "/" + "?" + "!" + "$" + "%" +
                               "&" + "*" + "+" + "<" + ">")
pp_digits = "0123456789"
pp_plusorminus = pyparsing.Literal('+') | pyparsing.Literal('-')
pp_int = pyparsing.Combine(
    pyparsing.Optional(pp_plusorminus) + pyparsing.Word(pp_digits))
pp_float = pyparsing.Combine(
    pyparsing.Optional(pp_plusorminus) + pyparsing.Optional(pp_int) + "." +
    pp_int)
pp_eorE = pyparsing.Literal('e') | pyparsing.Literal('E')
pp_e_notation = pyparsing.Combine(pp_float + pp_eorE + pp_int)
pp_number = pp_e_notation | pp_float | pp_int
pp_numberorname = pp_number | pp_param_name
pp_il = pyparsing.Word("il")
pp_choices = pp_param_name + pyparsing.Optional(
    pyparsing.OneOrMore("," + pp_param_name))

pp_cont_param = pp_param_name + "[" + pp_number + "," + pp_number + "]" + \
    "[" + pp_number + "]" + pyparsing.Optional(pp_il)
pp_cat_param = pp_param_name + "{" + pp_choices + "}" + "[" + pp_param_name + "]"
pp_condition = pp_param_name + "|" + pp_param_name + "in" + "{" + pp_choices + "}"
コード例 #5
0
class MacOSSecuritydLogParser(text_parser.PyparsingSingleLineTextParser):
  """Parses the securityd file that contains logs from the security daemon."""

  NAME = 'mac_securityd'
  DESCRIPTION = 'Parser for MacOS securityd log files.'

  _ENCODING = 'utf-8'
  _DEFAULT_YEAR = 2012

  DATE_TIME = pyparsing.Group(
      text_parser.PyparsingConstants.THREE_LETTERS.setResultsName('month') +
      text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day') +
      text_parser.PyparsingConstants.TIME_ELEMENTS)

  SECURITYD_LINE = (
      DATE_TIME.setResultsName('date_time') +
      pyparsing.CharsNotIn('[').setResultsName('sender') +
      pyparsing.Literal('[').suppress() +
      text_parser.PyparsingConstants.PID.setResultsName('sender_pid') +
      pyparsing.Literal(']').suppress() +
      pyparsing.Literal('<').suppress() +
      pyparsing.CharsNotIn('>').setResultsName('level') +
      pyparsing.Literal('>').suppress() +
      pyparsing.Literal('[').suppress() +
      pyparsing.CharsNotIn('{').setResultsName('facility') +
      pyparsing.Literal('{').suppress() +
      pyparsing.Optional(pyparsing.CharsNotIn(
          '}').setResultsName('security_api')) +
      pyparsing.Literal('}').suppress() +
      pyparsing.Optional(pyparsing.CharsNotIn(']:').setResultsName(
          'caller')) + pyparsing.Literal(']:').suppress() +
      pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('message'))

  REPEATED_LINE = (
      DATE_TIME.setResultsName('date_time') +
      pyparsing.Literal('--- last message repeated').suppress() +
      text_parser.PyparsingConstants.INTEGER.setResultsName('times') +
      pyparsing.Literal('time ---').suppress())

  LINE_STRUCTURES = [
      ('logline', SECURITYD_LINE),
      ('repeated', REPEATED_LINE)]

  def __init__(self):
    """Initializes a parser object."""
    super(MacOSSecuritydLogParser, self).__init__()
    self._last_month = None
    self._previous_structure = None
    self._year_use = 0

  def _GetTimeElementsTuple(self, structure):
    """Retrieves a time elements tuple from the structure.

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Returns:
      tuple: containing:
        year (int): year.
        month (int): month, where 1 represents January.
        day_of_month (int): day of month, where 1 is the first day of the month.
        hours (int): hours.
        minutes (int): minutes.
        seconds (int): seconds.
    """
    time_elements_tuple = self._GetValueFromStructure(structure, 'date_time')
    # TODO: what if time_elements_tuple is None.
    month, day, hours, minutes, seconds = time_elements_tuple

    # Note that dfdatetime_time_elements.TimeElements will raise ValueError
    # for an invalid month.
    month = timelib.MONTH_DICT.get(month.lower(), 0)

    if month != 0 and month < self._last_month:
      # Gap detected between years.
      self._year_use += 1

    return (self._year_use, month, day, hours, minutes, seconds)

  def _ParseLogLine(self, parser_mediator, structure, key):
    """Parse a single log line and produce an event object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
      key (str): name of the parsed structure.
    """
    time_elements_tuple = self._GetTimeElementsTuple(structure)

    try:
      date_time = dfdatetime_time_elements.TimeElements(
          time_elements_tuple=time_elements_tuple)
    except ValueError:
      parser_mediator.ProduceExtractionWarning(
          'invalid date time value: {0!s}'.format(time_elements_tuple))
      return

    self._last_month = time_elements_tuple[1]

    if key == 'logline':
      self._previous_structure = structure
      message = self._GetValueFromStructure(structure, 'message')
    else:
      repeat_count = self._GetValueFromStructure(structure, 'times')
      previous_message = self._GetValueFromStructure(
          self._previous_structure, 'message')
      message = 'Repeated {0:d} times: {1:s}'.format(
          repeat_count, previous_message)
      structure = self._previous_structure

    # It uses CarsNotIn structure which leaves whitespaces
    # at the beginning of the sender and the caller.
    caller = self._GetValueFromStructure(structure, 'caller')
    if caller:
      caller = caller.strip()

    # TODO: move this to formatter.
    if not caller:
      caller = 'unknown'

    sender = self._GetValueFromStructure(structure, 'sender')
    if sender:
      sender = sender.strip()

    event_data = MacOSSecuritydLogEventData()
    event_data.caller = caller
    event_data.facility = self._GetValueFromStructure(structure, 'facility')
    event_data.level = self._GetValueFromStructure(structure, 'level')
    event_data.message = message
    event_data.security_api = self._GetValueFromStructure(
        structure, 'security_api', default_value='unknown')
    event_data.sender_pid = self._GetValueFromStructure(structure, 'sender_pid')
    event_data.sender = sender

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_ADDED)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key not in ('logline', 'repeated'):
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    self._ParseLogLine(parser_mediator, structure, key)

  def VerifyStructure(self, parser_mediator, line):
    """Verify that this file is a securityd log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
    self._last_month = 0
    self._year_use = parser_mediator.GetEstimatedYear()

    try:
      structure = self.SECURITYD_LINE.parseString(line)
    except pyparsing.ParseException:
      logger.debug('Not a MacOS securityd log file')
      return False

    time_elements_tuple = self._GetTimeElementsTuple(structure)

    try:
      dfdatetime_time_elements.TimeElements(
          time_elements_tuple=time_elements_tuple)
    except ValueError:
      logger.debug(
          'Not a MacOS securityd log file, invalid date and time: {0!s}'.format(
              time_elements_tuple))
      return False

    self._last_month = time_elements_tuple[1]

    return True
コード例 #6
0
ファイル: parser.py プロジェクト: vascotenner/holoviews
class OptsSpec(Parser):
    """
    An OptsSpec is a string specification that describes an
    OptionTree. It is a list of tree path specifications (using dotted
    syntax) separated by keyword lists for any of the style, plotting
    or normalization options. These keyword lists are denoted
    'plot(..)', 'style(...)' and 'norm(...)'  respectively.  These
    three groups may be specified even more concisely using keyword
    lists delimited by square brackets, parentheses and braces
    respectively.  All these sets are optional and may be supplied in
    any order.

    For instance, the following string:

    Image (interpolation=None) plot(show_title=False) Curve style(color='r')

    Would specify an OptionTree where Image has "interpolation=None"
    for style and 'show_title=False' for plot options. The Curve has a
    style set such that color='r'.

    The parser is fairly forgiving; commas between keywords are
    optional and additional spaces are often allowed. The only
    restriction is that keywords *must* be immediately followed by the
    '=' sign (no space).
    """

    plot_options_short = pp.nestedExpr(
        '[', ']',
        content=pp.OneOrMore(pp.Word(allowed)
                             ^ pp.quotedString)).setResultsName('plot_options')

    plot_options_long = pp.nestedExpr(
        opener='plot[',
        closer=']',
        content=pp.OneOrMore(pp.Word(allowed)
                             ^ pp.quotedString)).setResultsName('plot_options')

    plot_options = (plot_options_short | plot_options_long)

    style_options_short = pp.nestedExpr(
        opener='(', closer=')',
        ignoreExpr=None).setResultsName("style_options")

    style_options_long = pp.nestedExpr(
        opener='style(', closer=')',
        ignoreExpr=None).setResultsName("style_options")

    style_options = (style_options_short | style_options_long)

    norm_options_short = pp.nestedExpr(
        opener='{', closer='}', ignoreExpr=None).setResultsName("norm_options")

    norm_options_long = pp.nestedExpr(
        opener='norm{', closer='}',
        ignoreExpr=None).setResultsName("norm_options")

    norm_options = (norm_options_short | norm_options_long)

    compositor_ops = pp.MatchFirst(
        [pp.Literal(el.group) for el in Compositor.definitions])

    dotted_path = pp.Combine(
        pp.Word(ascii_uppercase, exact=1) + pp.Word(pp.alphanums + '._'))

    pathspec = (dotted_path | compositor_ops).setResultsName("pathspec")

    spec_group = pp.Group(pathspec + (pp.Optional(norm_options)
                                      & pp.Optional(plot_options)
                                      & pp.Optional(style_options)))

    opts_spec = pp.OneOrMore(spec_group)

    # Aliases that map to the current option name for backward compatibility
    aliases = {
        'horizontal_spacing': 'hspace',
        'vertical_spacing': 'vspace',
        'figure_alpha': '    fig_alpha',
        'figure_bounds': 'fig_bounds',
        'figure_inches': 'fig_inches',
        'figure_latex': 'fig_latex',
        'figure_rcparams': 'fig_rcparams',
        'figure_size': 'fig_size',
        'show_xaxis': 'xaxis',
        'show_yaxis': 'yaxis'
    }

    @classmethod
    def process_normalization(cls, parse_group):
        """
        Given a normalization parse group (i.e. the contents of the
        braces), validate the option list and compute the appropriate
        integer value for the normalization plotting option.
        """
        if ('norm_options' not in parse_group): return None
        opts = parse_group['norm_options'][0].asList()
        if opts == []: return None

        options = ['+framewise', '-framewise', '+axiswise', '-axiswise']

        for normopt in options:
            if opts.count(normopt) > 1:
                raise SyntaxError("Normalization specification must not"
                                  " contain repeated %r" % normopt)

        if not all(opt in options for opt in opts):
            raise SyntaxError("Normalization option not one of %s" %
                              ", ".join(options))
        excluded = [('+framewise', '-framewise'), ('+axiswise', '-axiswise')]
        for pair in excluded:
            if all(exclude in opts for exclude in pair):
                raise SyntaxError("Normalization specification cannot"
                                  " contain both %s and %s" %
                                  (pair[0], pair[1]))

        # If unspecified, default is -axiswise and -framewise
        if len(opts) == 1 and opts[0].endswith('framewise'):
            axiswise = False
            framewise = True if '+framewise' in opts else False
        elif len(opts) == 1 and opts[0].endswith('axiswise'):
            framewise = False
            axiswise = True if '+axiswise' in opts else False
        else:
            axiswise = True if '+axiswise' in opts else False
            framewise = True if '+framewise' in opts else False

        return dict(axiswise=axiswise, framewise=framewise)

    @classmethod
    def parse(cls, line, ns={}):
        """
        Parse an options specification, returning a dictionary with
        path keys and {'plot':<options>, 'style':<options>} values.
        """
        parses = [p for p in cls.opts_spec.scanString(line)]
        if len(parses) != 1:
            raise SyntaxError("Invalid specification syntax.")
        else:
            e = parses[0][2]
            processed = line[:e]
            if (processed.strip() != line.strip()):
                raise SyntaxError("Failed to parse remainder of string: %r" %
                                  line[e:])

        parse = {}
        for group in cls.opts_spec.parseString(line):
            options = {}

            normalization = cls.process_normalization(group)
            if normalization is not None:
                options['norm'] = Options(**normalization)

            if 'plot_options' in group:
                plotopts = group['plot_options'][0]
                opts = cls.todict(plotopts, 'brackets', ns=ns)
                options['plot'] = Options(
                    **{cls.aliases.get(k, k): v
                       for k, v in opts.items()})

            if 'style_options' in group:
                styleopts = group['style_options'][0]
                opts = cls.todict(styleopts, 'parens', ns=ns)
                options['style'] = Options(
                    **{cls.aliases.get(k, k): v
                       for k, v in opts.items()})

            if group['pathspec'] in parse:
                # Update in case same pathspec accidentally repeated by the user.
                parse[group['pathspec']].update(options)
            else:
                parse[group['pathspec']] = options
        return parse
コード例 #7
0
class SkyDriveOldLogParser(text_parser.PyparsingSingleLineTextParser):
  """Parse SkyDrive old log files."""

  NAME = u'skydrive_log_old'
  DESCRIPTION = u'Parser for OneDrive (or SkyDrive) old log files.'

  _ENCODING = u'UTF-8-SIG'

  # Common SDOL (SkyDriveOldLog) pyparsing objects.
  SDOL_COLON = pyparsing.Literal(u':')
  SDOL_EXCLAMATION = pyparsing.Literal(u'!')

  # Timestamp (08-01-2013 21:22:28.999).
  SDOL_TIMESTAMP = (
      text_parser.PyparsingConstants.DATE_REV +
      text_parser.PyparsingConstants.TIME_MSEC).setResultsName(
          u'sdol_timestamp')

  # SkyDrive source code pyparsing structures.
  SDOL_SOURCE_CODE = pyparsing.Combine(
      pyparsing.CharsNotIn(u':') +
      SDOL_COLON +
      text_parser.PyparsingConstants.INTEGER +
      SDOL_EXCLAMATION +
      pyparsing.Word(pyparsing.printables)).setResultsName(u'source_code')

  # SkyDriveOldLogLevel pyparsing structures.
  SDOL_LOG_LEVEL = (
      pyparsing.Literal(u'(').suppress() +
      pyparsing.SkipTo(u')').setResultsName(u'log_level') +
      pyparsing.Literal(u')').suppress())

  # SkyDrive line pyparsing structure.
  SDOL_LINE = (
      SDOL_TIMESTAMP + SDOL_SOURCE_CODE + SDOL_LOG_LEVEL +
      SDOL_COLON + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text'))

  # Sometimes the timestamped log line is followed by an empy line,
  # then by a file name plus other data and finally by another empty
  # line. It could happen that a logline is split in two parts.
  # These lines will not be discarded and an event will be generated
  # ad-hoc (see source), based on the last one if available.
  SDOL_NO_HEADER_SINGLE_LINE = (
      pyparsing.Optional(pyparsing.Literal(u'->').suppress()) +
      pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text'))

  # Define the available log line structures.
  LINE_STRUCTURES = [
      (u'logline', SDOL_LINE),
      (u'no_header_single_line', SDOL_NO_HEADER_SINGLE_LINE),
  ]

  def __init__(self):
    """Initializes a parser object."""
    super(SkyDriveOldLogParser, self).__init__()
    self._last_event_object = None
    self.offset = 0

  def _ConvertToTimestamp(self, sdol_timestamp):
    """Converts the given parsed date and time to a timestamp.

    This is a sdol_timestamp object as returned by using
    text_parser.PyparsingConstants structures:
    [[month, day, year], [hours, minutes, seconds], milliseconds], for example
    [[8, 1, 2013], [21, 22, 28], 999].

    Args:
      sdol_timestamp: The pyparsing ParseResults object.

    Returns:
      The timestamp which is an integer containing the number of micro seconds
      since January 1, 1970, 00:00:00 UTC.
    """
    month, day, year = sdol_timestamp[0]
    hour, minute, second = sdol_timestamp[1]
    millisecond = sdol_timestamp[2]
    return timelib.Timestamp.FromTimeParts(
        year, month, day, hour, minute, second,
        microseconds=millisecond * 1000)

  def _ParseLogline(self, parser_mediator, structure):
    """Parse a logline and store appropriate attributes.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      structure: A pyparsing.ParseResults object from a line in the log file.
    """
    try:
      timestamp = self._ConvertToTimestamp(structure.sdol_timestamp)
    except errors.TimestampError as exception:
      parser_mediator.ProduceParseError(
          u'unable to determine timestamp with error: {0:s}'.format(
              exception))
      return

    event_object = SkyDriveOldLogEvent(
        timestamp, self.offset, structure.source_code, structure.log_level,
        structure.text)
    parser_mediator.ProduceEvent(event_object)

    self._last_event_object = event_object

  def _ParseNoHeaderSingleLine(self, parser_mediator, structure):
    """Parse an isolated header line and store appropriate attributes.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      structure: A pyparsing.ParseResults object from an header line in the
                 log file.
    """
    if not self._last_event_object:
      logging.debug(u'SkyDrive, found isolated line with no previous events')
      return

    event_object = SkyDriveOldLogEvent(
        self._last_event_object.timestamp, self._last_event_object.offset, None,
        None, structure.text)
    parser_mediator.ProduceEvent(event_object)

    # TODO think to a possible refactoring for the non-header lines.
    self._last_event_object = None

  def ParseRecord(self, parser_mediator, key, structure):
    """Parse each record structure and return an EventObject if applicable.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      key: An identification string indicating the name of the parsed
           structure.
      structure: A pyparsing.ParseResults object from a line in the
                 log file.
    """
    if key == u'logline':
      self._ParseLogline(parser_mediator, structure)

    elif key == u'no_header_single_line':
      self._ParseNoHeaderSingleLine(parser_mediator, structure)

    else:
      logging.warning(
          u'Unable to parse record, unknown structure: {0:s}'.format(key))

  def VerifyStructure(self, parser_mediator, line):
    """Verify that this file is a SkyDrive old log file.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      line: A single line from the text file.

    Returns:
      True if this is the correct parser, False otherwise.
    """
    try:
      parsed_structure = self.SDOL_LINE.parseString(line)
    except pyparsing.ParseException:
      logging.debug(u'Not a SkyDrive old log file')
      return False

    try:
      self._ConvertToTimestamp(parsed_structure.sdol_timestamp)
    except errors.TimestampError:
      logging.debug(
          u'Not a SkyDrive old log file, invalid timestamp {0:s}'.format(
              parsed_structure.sdol_timestamp))
      return False

    return True
コード例 #8
0
                       pyparsing.alphanums + "_$.").setName("identifier")
columnName = pyparsing.delimitedList(ident, ".", combine=True)
columnNameList = pyparsing.Group(pyparsing.delimitedList(columnName))
tableName = pyparsing.delimitedList(ident, ".", combine=True)
tableNameList = pyparsing.Group(pyparsing.delimitedList(tableName))

whereExpression = pyparsing.Forward()
and_ = pyparsing.Keyword("and", caseless=True)
or_ = pyparsing.Keyword("or", caseless=True)
in_ = pyparsing.Keyword("in", caseless=True)

E = pyparsing.CaselessLiteral("E")
binop = pyparsing.oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
arithSign = pyparsing.Word("+-", exact=1)
realNum = pyparsing.Combine(
    pyparsing.Optional(arithSign) +
    (pyparsing.Word(pyparsing.nums) + "." +
     pyparsing.Optional(pyparsing.Word(pyparsing.nums))
     | ("." + pyparsing.Word(pyparsing.nums))) +
    pyparsing.Optional(E + pyparsing.Optional(arithSign) +
                       pyparsing.Word(pyparsing.nums)))
intNum = pyparsing.Combine(
    pyparsing.Optional(arithSign) + pyparsing.Word(pyparsing.nums) +
    pyparsing.Optional(E + pyparsing.Optional("+") +
                       pyparsing.Word(pyparsing.nums)))

columnRval = realNum | intNum | pyparsing.quotedString | columnName  # need to add support for
# alg expressions
whereCondition = pyparsing.Group((columnName + binop + columnRval)
                                 | (columnName + in_ + "(" +
                                    pyparsing.delimitedList(columnRval) + ")")
コード例 #9
0
                            key='original_document',
                            value=content)
            extra.save()

            extra = HOExtra(object=harvest_object,
                            key='original_format',
                            value=document_format)
            extra.save()

        return True


apache  = parse.SkipTo(parse.CaselessLiteral("<a href="), include=True).suppress() \
        + parse.quotedString.setParseAction(parse.removeQuotes).setResultsName('url') \
        + parse.SkipTo("</a>", include=True).suppress() \
        + parse.Optional(parse.Literal('</td><td align="right">')).suppress() \
        + parse.Optional(parse.Combine(
            parse.Word(parse.alphanums+'-') +
            parse.Word(parse.alphanums+':')
        ,adjacent=False, joinString=' ').setResultsName('date')
        )

iis =      parse.SkipTo("<br>").suppress() \
         + parse.OneOrMore("<br>").suppress() \
         + parse.Optional(parse.Combine(
           parse.Word(parse.alphanums+'/') +
           parse.Word(parse.alphanums+':') +
           parse.Word(parse.alphas)
         , adjacent=False, joinString=' ').setResultsName('date')
         ) \
         + parse.Word(parse.nums).suppress() \
コード例 #10
0
class SkyDriveLogErrorParser(text_parser.PyparsingMultiLineTextParser):
    """Parse SkyDrive error log files."""

    NAME = u'skydrive_log_error'
    DESCRIPTION = u'Parser for OneDrive (or SkyDrive) error log files.'

    ENCODING = u'utf-8'

    # Common SDE (SkyDriveError) structures.
    INTEGER_CAST = text_parser.PyParseIntCast
    HYPHEN = text_parser.PyparsingConstants.HYPHEN
    TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS
    TIME_MSEC = text_parser.PyparsingConstants.TIME_MSEC
    MSEC = pyparsing.Word(pyparsing.nums, max=3).setParseAction(INTEGER_CAST)
    COMMA = pyparsing.Literal(u',').suppress()
    DOT = pyparsing.Literal(u'.').suppress()
    IGNORE_FIELD = pyparsing.CharsNotIn(u',').suppress()

    # Header line timestamp (2013-07-25-160323.291).
    SDE_HEADER_TIMESTAMP = pyparsing.Group(
        text_parser.PyparsingConstants.DATE.setResultsName(u'date') + HYPHEN +
        TWO_DIGITS.setResultsName(u'hh') + TWO_DIGITS.setResultsName(u'mm') +
        TWO_DIGITS.setResultsName(u'ss') + DOT +
        MSEC.setResultsName(u'ms')).setResultsName(u'hdr_timestamp')

    # Line timestamp (07-25-13,16:06:31.820).
    SDE_TIMESTAMP = (
        TWO_DIGITS.setResultsName(u'month') + HYPHEN +
        TWO_DIGITS.setResultsName(u'day') + HYPHEN +
        TWO_DIGITS.setResultsName(u'year_short') + COMMA +
        TIME_MSEC.setResultsName(u'time')).setResultsName(u'timestamp')

    # Header start.
    SDE_HEADER_START = (
        pyparsing.Literal(u'######').suppress() +
        pyparsing.Literal(u'Logging started.').setResultsName(u'log_start'))

    # Multiline entry end marker, matched from right to left.
    SDE_ENTRY_END = pyparsing.StringEnd() | SDE_HEADER_START | SDE_TIMESTAMP

    # SkyDriveError line pyparsing structure.
    SDE_LINE = (SDE_TIMESTAMP + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD +
                COMMA + IGNORE_FIELD + COMMA +
                pyparsing.CharsNotIn(u',').setResultsName(u'module') + COMMA +
                pyparsing.CharsNotIn(u',').setResultsName(u'source_code') +
                COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA +
                IGNORE_FIELD + COMMA + pyparsing.Optional(
                    pyparsing.CharsNotIn(u',').setResultsName(u'text')) +
                COMMA +
                pyparsing.SkipTo(SDE_ENTRY_END).setResultsName(u'detail') +
                pyparsing.lineEnd())

    # SkyDriveError header pyparsing structure.
    SDE_HEADER = (
        SDE_HEADER_START +
        pyparsing.Literal(u'Version=').setResultsName(u'ver_str') +
        pyparsing.Word(pyparsing.nums + u'.').setResultsName(u'ver_num') +
        pyparsing.Literal(u'StartSystemTime:').suppress() +
        SDE_HEADER_TIMESTAMP +
        pyparsing.Literal(u'StartLocalTime:').setResultsName(u'lt_str') +
        pyparsing.SkipTo(pyparsing.lineEnd()).setResultsName(u'details') +
        pyparsing.lineEnd())

    # Define the available log line structures.
    LINE_STRUCTURES = [(u'logline', SDE_LINE), (u'header', SDE_HEADER)]

    def __init__(self):
        """Initializes a parser object."""
        super(SkyDriveLogErrorParser, self).__init__()
        self.use_local_zone = False

    def _GetTimestampFromHeader(self, structure):
        """Gets a timestamp from the structure.

    The following is an example of the timestamp structure expected
    [[2013, 7, 25], 16, 3, 23, 291]

    Args:
      structure: The parsed structure, which should be a timestamp.

    Returns:
      timestamp: A plaso timelib timestamp event or 0.
    """
        year, month, day = structure.date
        hour = structure.get(u'hh', 0)
        minute = structure.get(u'mm', 0)
        second = structure.get(u'ss', 0)
        microsecond = structure.get(u'ms', 0) * 1000

        return timelib.Timestamp.FromTimeParts(year,
                                               month,
                                               day,
                                               hour,
                                               minute,
                                               second,
                                               microseconds=microsecond)

    def _GetTimestampFromLine(self, structure):
        """Gets a timestamp from string from the structure

    The following is an example of the timestamp structure expected
    [7, 25, 13, [16, 3, 24], 649]

    Args:
      structure: The parsed structure.

    Returns:
      timestamp: A plaso timelib timestamp event or 0.
    """
        hour, minute, second = structure.time[0]
        microsecond = structure.time[1] * 1000
        # TODO: Verify if timestamps are locale dependent.
        year = structure.get(u'year_short', 0)
        month = structure.get(u'month', 0)
        day = structure.get(u'day', 0)
        if year < 0 or not month or not day:
            return 0

        year += 2000

        return timelib.Timestamp.FromTimeParts(year,
                                               month,
                                               day,
                                               hour,
                                               minute,
                                               second,
                                               microseconds=microsecond)

    def _ParseHeader(self, structure):
        """Parse header lines and store appropriate attributes.

    [u'Logging started.', u'Version=', u'17.0.2011.0627',
    [2013, 7, 25], 16, 3, 23, 291, u'StartLocalTime', u'<details>']

    Args:
      structure: The parsed structure.

    Returns:
      timestamp: The event or none.
    """
        timestamp = self._GetTimestampFromHeader(structure.hdr_timestamp)
        if not timestamp:
            logging.debug(u'SkyDriveLogError invalid timestamp {0:d}'.format(
                structure.hdr_timestamp))
            return
        text = u'{0:s} {1:s} {2:s}'.format(structure.log_start,
                                           structure.ver_str,
                                           structure.ver_num)
        detail = u'{0:s} {1:s}'.format(structure.lt_str, structure.details)
        return SkyDriveLogErrorEvent(timestamp, None, None, text, detail)

    def _ParseLine(self, structure):
        """Parse a logline and store appropriate attributes."""
        timestamp = self._GetTimestampFromLine(structure.timestamp)
        if not timestamp:
            logging.debug(u'SkyDriveLogError invalid timestamp {0:s}'.format(
                structure.timestamp))
            return
        # Replace newlines with spaces in structure.detail to preserve output.
        return SkyDriveLogErrorEvent(timestamp, structure.module,
                                     structure.source_code, structure.text,
                                     structure.detail.replace(u'\n', u' '))

    def ParseRecord(self, parser_mediator, key, structure):
        """Parse each record structure and return an EventObject if applicable.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      key: An identification string indicating the name of the parsed
           structure.
      structure: A pyparsing.ParseResults object from a line in the
                 log file.

    Returns:
      An event object (instance of EventObject) or None.
    """
        if key == u'logline':
            return self._ParseLine(structure)
        elif key == u'header':
            return self._ParseHeader(structure)
        else:
            logging.warning(
                u'Unable to parse record, unknown structure: {0:s}'.format(
                    key))

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a SkyDrive Error log file.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      line: A single line from the text file.

    Returns:
      True if this is the correct parser, False otherwise.
    """
        try:
            parsed_structure = self.SDE_HEADER.parseString(line)
        except pyparsing.ParseException:
            logging.debug(u'Not a SkyDrive Error log file')
            return False
        timestamp = self._GetTimestampFromHeader(
            parsed_structure.hdr_timestamp)
        if not timestamp:
            logging.debug(
                u'Not a SkyDrive Error log file, invalid timestamp {0:s}'.
                format(parsed_structure.timestamp))
            return False
        return True
コード例 #11
0

def build_num(strnum):
    if "." in strnum:
        return float(strnum)
    else:
        return int(strnum)


onenine = pp.Word("123456789", exact=1)
digit = pp.Word(pp.nums, exact=1)
digits = pp.Word(pp.nums)
fraction = pp.Literal(".") + digits
integer = pp.Combine(onenine + digits) | digit
number = pp.Combine(
    pp.Optional(pp.Literal("-")) + integer +
    pp.Optional(fraction)).setParseAction(lambda t: build_num(t[0]))

number.runTests("""
    1
    1.0
    0.1
    911
    01.0
    -119
    """)


class Null:
    def __init__(self):
        pass
コード例 #12
0
ファイル: xchatlog.py プロジェクト: Team-Firebugs/plaso
class XChatLogParser(text_parser.PyparsingSingleLineTextParser):
    """Parse XChat log files."""

    NAME = u'xchatlog'
    DESCRIPTION = u'Parser for XChat log files.'

    _ENCODING = u'UTF-8'

    # Common (header/footer/body) pyparsing structures.
    # TODO: Only English ASCII timestamp supported ATM, add support for others.

    _WEEKDAY = pyparsing.Group(
        pyparsing.Keyword(u'Sun') | pyparsing.Keyword(u'Mon')
        | pyparsing.Keyword(u'Tue') | pyparsing.Keyword(u'Wed')
        | pyparsing.Keyword(u'Thu') | pyparsing.Keyword(u'Fri')
        | pyparsing.Keyword(u'Sat'))

    # Header/footer pyparsing structures.
    # Sample: "**** BEGIN LOGGING AT Mon Dec 31 21:11:55 2011".
    # Note that "BEGIN LOGGING" text is localized (default, English) and can be
    # different if XChat locale is different.

    _HEADER_SIGNATURE = pyparsing.Keyword(u'****')
    _HEADER_DATE_TIME = pyparsing.Group(
        _WEEKDAY.setResultsName(u'weekday') +
        text_parser.PyparsingConstants.THREE_LETTERS.setResultsName(u'month') +
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(
            u'day') + text_parser.PyparsingConstants.TIME_ELEMENTS +
        text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName(u'year'))
    _LOG_ACTION = pyparsing.Group(
        pyparsing.Word(pyparsing.printables) +
        pyparsing.Word(pyparsing.printables) +
        pyparsing.Word(pyparsing.printables))
    _HEADER = (_HEADER_SIGNATURE.suppress() +
               _LOG_ACTION.setResultsName(u'log_action') +
               _HEADER_DATE_TIME.setResultsName(u'date_time'))

    # Body (nickname, text and/or service messages) pyparsing structures.
    # Sample: "dec 31 21:11:58 <fpi> ola plas-ing guys!".

    _DATE_TIME = pyparsing.Group(
        text_parser.PyparsingConstants.THREE_LETTERS.setResultsName(u'month') +
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(
            u'day') + text_parser.PyparsingConstants.TIME_ELEMENTS)
    _NICKNAME = pyparsing.QuotedString(
        u'<', endQuoteChar=u'>').setResultsName(u'nickname')
    _LOG_LINE = (_DATE_TIME.setResultsName(u'date_time') +
                 pyparsing.Optional(_NICKNAME) +
                 pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text'))

    LINE_STRUCTURES = [
        (u'logline', _LOG_LINE),
        (u'header', _HEADER),
        (u'header_signature', _HEADER_SIGNATURE),
    ]

    def __init__(self):
        """Initializes a parser object."""
        super(XChatLogParser, self).__init__()
        self._last_month = 0
        self._xchat_year = None
        self.offset = 0

    def _GetTimeElementsTuple(self, structure):
        """Retrieves a time elements tuple from the structure.

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Returns:
      tuple: contains:
        year (int): year.
        month (int): month, where 1 represents January.
        day_of_month (int): day of month, where 1 is the first day of the month.
        hours (int): hours.
        minutes (int): minutes.
        seconds (int): seconds.
    """
        month, day, hours, minutes, seconds = structure.date_time

        month = timelib.MONTH_DICT.get(month.lower(), 0)

        if month != 0 and month < self._last_month:
            # Gap detected between years.
            self._xchat_year += 1

        return (self._xchat_year, month, day, hours, minutes, seconds)

    def _ParseHeader(self, parser_mediator, structure):
        """Parses a log header.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
        _, month, day, hours, minutes, seconds, year = structure.date_time

        month = timelib.MONTH_DICT.get(month.lower(), 0)

        time_elements_tuple = (year, month, day, hours, minutes, seconds)

        try:
            date_time = dfdatetime_time_elements.TimeElements(
                time_elements_tuple=time_elements_tuple)
            date_time.is_local_time = True
        except ValueError:
            parser_mediator.ProduceExtractionError(
                u'invalid date time value: {0!s}'.format(structure.date_time))
            return

        self._last_month = month

        event_data = XChatLogEventData()

        if structure.log_action[0] == u'BEGIN':
            self._xchat_year = year
            event_data.text = u'XChat start logging'

        elif structure.log_action[0] == u'END':
            self._xchat_year = None
            event_data.text = u'XChat end logging'

        else:
            logging.debug(u'Unknown log action: {0:s}.'.format(u' '.join(
                structure.log_action)))
            return

        event = time_events.DateTimeValuesEvent(
            date_time,
            definitions.TIME_DESCRIPTION_ADDED,
            time_zone=parser_mediator.timezone)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def _ParseLogLine(self, parser_mediator, structure):
        """Parses a log line.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
        if not self._xchat_year:
            return

        time_elements_tuple = self._GetTimeElementsTuple(structure)

        try:
            date_time = dfdatetime_time_elements.TimeElements(
                time_elements_tuple=time_elements_tuple)
            date_time.is_local_time = True
        except ValueError:
            parser_mediator.ProduceExtractionError(
                u'invalid date time value: {0!s}'.format(structure.date_time))
            return

        self._last_month = time_elements_tuple[1]

        event_data = XChatLogEventData()
        event_data.nickname = structure.nickname
        # The text string contains multiple unnecessary whitespaces that need to
        # be removed, thus the split and re-join.
        event_data.text = u' '.join(structure.text.split())

        event = time_events.DateTimeValuesEvent(
            date_time,
            definitions.TIME_DESCRIPTION_ADDED,
            time_zone=parser_mediator.timezone)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in (u'header', u'header_signature', u'logline'):
            raise errors.ParseError(
                u'Unable to parse record, unknown structure: {0:s}'.format(
                    key))

        if key == u'logline':
            self._ParseLogLine(parser_mediator, structure)

        elif key == u'header':
            self._ParseHeader(parser_mediator, structure)

        elif key == u'header_signature':
            # If this key is matched (after others keys failed) we got a different
            # localized header and we should stop parsing until a new good header
            # is found. Stop parsing is done setting xchat_year to 0.
            # Note that the code assumes that LINE_STRUCTURES will be used in the
            # exact order as defined!
            logging.warning(u'Unknown locale header.')
            self._xchat_year = 0

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a XChat log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (bytes): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
        try:
            structure = self._HEADER.parseString(line)
        except pyparsing.ParseException:
            logging.debug(u'Not a XChat log file')
            return False

        _, month, day, hours, minutes, seconds, year = structure.date_time

        month = timelib.MONTH_DICT.get(month.lower(), 0)

        time_elements_tuple = (year, month, day, hours, minutes, seconds)

        try:
            dfdatetime_time_elements.TimeElements(
                time_elements_tuple=time_elements_tuple)
        except ValueError:
            logging.debug(
                u'Not a XChat log file, invalid date and time: {0!s}'.format(
                    structure.date_time))
            return False

        return True
コード例 #13
0
ファイル: popcontest.py プロジェクト: olivierh59500/plaso
class PopularityContestParser(text_parser.PyparsingSingleLineTextParser):
  """Parse popularity contest log files."""

  NAME = u'popularity_contest'
  DESCRIPTION = u'Parser for popularity contest log files.'

  _ASCII_PRINTABLES = pyparsing.printables
  if sys.version_info[0] < 3:
    _UNICODE_PRINTABLES = u''.join(
        unichr(character) for character in xrange(65536)
        if not unichr(character).isspace())
  else:
    _UNICODE_PRINTABLES = u''.join(
        chr(character) for character in range(65536)
        if not chr(character).isspace())

  MRU = pyparsing.Word(_UNICODE_PRINTABLES).setResultsName(u'mru')
  PACKAGE = pyparsing.Word(_ASCII_PRINTABLES).setResultsName(u'package')
  TAG = pyparsing.QuotedString(u'<', endQuoteChar=u'>').setResultsName(u'tag')
  TIMESTAMP = text_parser.PyparsingConstants.INTEGER.setResultsName(
      u'timestamp')

  HEADER = (
      pyparsing.Literal(u'POPULARITY-CONTEST-').suppress() +
      text_parser.PyparsingConstants.INTEGER.setResultsName(u'session') +
      pyparsing.Literal(u'TIME:').suppress() + TIMESTAMP +
      pyparsing.Literal(u'ID:').suppress() +
      pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName(u'id') +
      pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName(u'details'))

  FOOTER = (
      pyparsing.Literal(u'END-POPULARITY-CONTEST-').suppress() +
      text_parser.PyparsingConstants.INTEGER.setResultsName(u'session') +
      pyparsing.Literal(u'TIME:').suppress() + TIMESTAMP)

  LOG_LINE = (
      TIMESTAMP.setResultsName(u'atime') + TIMESTAMP.setResultsName(u'ctime') +
      (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG)))

  LINE_STRUCTURES = [
      (u'logline', LOG_LINE),
      (u'header', HEADER),
      (u'footer', FOOTER),
  ]

  _ENCODING = u'UTF-8'

  def _ParseLogLine(self, parser_mediator, structure):
    """Parses an event object from the log line.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      structure: the log line structure object (instance of
                 pyparsing.ParseResults).
    """
    # Required fields are <mru> and <atime> and we are not interested in
    # log lines without <mru>.
    if not structure.mru:
      return

    # The <atime> field (as <ctime>) is always present but could be 0.
    # In case of <atime> equal to 0, we are in <NOFILES> case, safely return
    # without logging.
    if structure.atime:
      # TODO: not doing any check on <tag> fields, even if only informative
      # probably it could be better to check for the expected values.
      event_object = PopularityContestEvent(
          structure.atime, eventdata.EventTimestamp.ACCESS_TIME,
          structure.package, structure.mru, tag=structure.tag)
      parser_mediator.ProduceEvent(event_object)

    if structure.ctime:
      event_object = PopularityContestEvent(
          structure.ctime, eventdata.EventTimestamp.ENTRY_MODIFICATION_TIME,
          structure.package, structure.mru, tag=structure.tag)
      parser_mediator.ProduceEvent(event_object)

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a log record structure and produces events.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      key: An identification string indicating the name of the parsed
           structure.
      structure: A pyparsing.ParseResults object from a line in the
                 log file.
    """
    # TODO: Add anomaly objects for abnormal timestamps, such as when the log
    # timestamp is greater than the session start.
    if key == u'logline':
      self._ParseLogLine(parser_mediator, structure)

    elif key == u'header':
      if not structure.timestamp:
        logging.debug(
            u'PopularityContestParser, header with invalid timestamp.')
        return

      session = u'{0!s}'.format(structure.session)
      event_object = PopularityContestSessionEvent(
          structure.timestamp, session, u'start', details=structure.details,
          hostid=structure.id)
      parser_mediator.ProduceEvent(event_object)

    elif key == u'footer':
      if not structure.timestamp:
        logging.debug(
            u'PopularityContestParser, footer with invalid timestamp.')
        return

      session = u'{0!s}'.format(structure.session)
      event_object = PopularityContestSessionEvent(
          structure.timestamp, session, u'end')
      parser_mediator.ProduceEvent(event_object)

    else:
      logging.warning(
          u'PopularityContestParser, unknown structure: {0:s}.'.format(key))

  def VerifyStructure(self, parser_mediator, line):
    """Verify that this file is a Popularity Contest log file.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      line: A single line from the text file.

    Returns:
      True if this is the correct parser, False otherwise.
    """
    try:
      header_struct = self.HEADER.parseString(line)
    except pyparsing.ParseException:
      logging.debug(u'Not a Popularity Contest log file, invalid header')
      return False

    if not timelib.Timestamp.FromPosixTime(header_struct.timestamp):
      logging.debug(u'Invalid Popularity Contest log file header timestamp.')
      return False
    return True
コード例 #14
0
class StructDefine(object):
    """
    StructDefine is a decorator class used for defining structures
    by parsing a simple intermediate language input decorating
    a StructFormatter class.
    """

    All = {}
    rawtypes = (
        "x",
        "c",
        "b",
        "B",
        "h",
        "H",
        "i",
        "I",
        "l",
        "L",
        "f",
        "d",
        "s",
        "n",
        "N",
        "p",
        "P",
        "q",
        "Q",
    )
    alignments = {
        "x": 1,
        "c": 1,
        "b": 1,
        "B": 1,
        "s": 1,
        "h": 2,
        "H": 2,
        "i": 4,
        "I": 4,
        "l": 4,
        "L": 4,
        "f": 4,
        "q": 8,
        "Q": 8,
        "d": 8,
        "P": 8,
    }
    integer = pp.Regex(r"[0-9][0-9]*")
    integer.setParseAction(lambda r: int(r[0]))
    bitslen = pp.Group(pp.Suppress("#") + integer + pp.Suppress(".") + integer)
    symbol = pp.Regex(r"[A-Za-z_][A-Za-z0-9_]*")
    comment = pp.Suppress(";") + pp.restOfLine
    fieldname = pp.Suppress(":") + pp.Group(
        pp.Optional(pp.Literal(">") | pp.Literal("<"), default=None) + symbol)
    inf = pp.Regex(r"~[bBhHiI]?")
    length = integer | symbol | inf | bitslen
    typename = pp.Group(symbol +
                        pp.Optional(pp.Suppress("*") + length, default=0))
    structfmt = pp.OneOrMore(
        pp.Group(typename + fieldname + pp.Optional(comment, default="")))

    def __init__(self, fmt, **kargs):
        self.fields = []
        self.source = fmt
        self.packed = kargs.get("packed", False)
        if "alignments" in kargs:
            self.alignments = kargs["alignments"]
        for l in self.structfmt.parseString(fmt, True).asList():
            f_type, f_name, f_comment = l
            f_order, f_name = f_name
            f_type, f_count = f_type
            if f_order is None and "order" in kargs:
                f_order = kargs["order"]
            if f_type in self.rawtypes:
                f_cls = RawField
                if isinstance(f_count, str) and f_count.startswith("~"):
                    f_cls = VarField
                    if f_count[1:] in "bBhHiI":
                        f_cls = CntField
                f_align = self.alignments[f_type]
            else:
                f_cls = Field
                f_type = kargs.get(f_type, f_type)
                f_align = 0
            self.fields.append(
                f_cls(f_type, f_count, f_name, f_order, f_align, f_comment))

    def __call__(self, cls):
        self.All[cls.__name__] = cls
        cls.fields = self.fields
        cls.source = self.source
        cls.packed = self.packed
        cls.fkeys = defaultdict(default_formatter)
        return cls
コード例 #15
0
ファイル: pathspec.py プロジェクト: vadimostanin/bob
    def __init__(self, cacheKey, aliases, stringFunctions, packageGenerator):
        self.__cacheKey = cacheKey
        self.__aliases = aliases
        self.__stringFunctions = stringFunctions
        self.__generator = packageGenerator
        self.__root = None
        self.__graph = None

        # create parsing grammer
        locationPath = pyparsing.Forward()
        relativeLocationPath = pyparsing.Forward()

        axisName = \
              pyparsing.Keyword("descendant-or-self") \
            | pyparsing.Keyword("child") \
            | pyparsing.Keyword("descendant") \
            | pyparsing.Keyword("self")

        nodeTest = pyparsing.Word(pyparsing.alphanums + "_.:+-*")
        axisSpecifier = axisName + '@'
        abbreviatedStep = pyparsing.Keyword('.')

        sQStringLiteral = pyparsing.QuotedString("'")
        sQStringLiteral.setParseAction(lambda s, loc, toks: StringLiteral(
            s, loc, toks, False, self.__stringFunctions, self.__getGraphIter))
        dQStringLiteral = pyparsing.QuotedString('"', '\\')
        dQStringLiteral.setParseAction(lambda s, loc, toks: StringLiteral(
            s, loc, toks, True, self.__stringFunctions, self.__getGraphIter))
        stringLiteral = sQStringLiteral | dQStringLiteral

        functionCall = pyparsing.Forward()
        functionArg = stringLiteral | functionCall
        functionCall << pyparsing.Word(pyparsing.alphas, pyparsing.alphanums+'-') + \
            pyparsing.Suppress('(') + \
            pyparsing.Optional(functionArg +
                pyparsing.ZeroOrMore(pyparsing.Suppress(',') + functionArg)) + \
            pyparsing.Suppress(')')
        functionCall.setParseAction(lambda s, loc, toks: FunctionCall(
            s, loc, toks, self.__stringFunctions, self.__getGraphIter))

        predExpr = pyparsing.infixNotation(
            locationPath ^ stringLiteral ^ functionCall,
            [('!', 1, pyparsing.opAssoc.RIGHT, lambda s, loc, toks:
              NotOperator(s, loc, toks, self.__getGraphRoot)),
             ('<', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks:
              BinaryStrOperator(s, loc, toks, self.__getGraphIter)),
             ('<=', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks:
              BinaryStrOperator(s, loc, toks, self.__getGraphIter)),
             ('>', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks:
              BinaryStrOperator(s, loc, toks, self.__getGraphIter)),
             ('>=', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks:
              BinaryStrOperator(s, loc, toks, self.__getGraphIter)),
             ('==', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks:
              BinaryStrOperator(s, loc, toks, self.__getGraphIter)),
             ('!=', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks:
              BinaryStrOperator(s, loc, toks, self.__getGraphIter)),
             ('&&', 2, pyparsing.opAssoc.LEFT,
              lambda s, loc, toks: BinaryBoolOperator(s, loc, toks)),
             ('||', 2, pyparsing.opAssoc.LEFT,
              lambda s, loc, toks: BinaryBoolOperator(s, loc, toks))])
        predicate = '[' + predExpr + ']'
        step = abbreviatedStep | (pyparsing.Optional(axisSpecifier) +
                                  nodeTest + pyparsing.Optional(predicate))
        step.setParseAction(lambda s, loc, toks: LocationStep(s, loc, toks))
        abbreviatedRelativeLocationPath = step + '//' + relativeLocationPath
        relativeLocationPath << (abbreviatedRelativeLocationPath |
                                 (step + '/' + relativeLocationPath) | step)
        abbreviatedAbsoluteLocationPath = '//' + relativeLocationPath
        absoluteLocationPath = abbreviatedAbsoluteLocationPath | \
                               ('/' + relativeLocationPath)
        locationPath << (absoluteLocationPath | relativeLocationPath)
        locationPath.setParseAction(lambda s, loc, toks: LocationPath(
            s, loc, toks, self.__getGraphRoot))

        self.__pathGrammer = locationPath
コード例 #16
0
ファイル: parser.py プロジェクト: dbilli/exprparser
    def __create(self):

        START = pp.StringStart().suppress()
        END = pp.StringEnd().suppress()

        #----------------------------------------------------------------------#
        # LANGUAGE TOKENS
        #----------------------------------------------------------------------#

        TRUE = pp.Literal('True').setParseAction(lambda s, loc, toks: toks[0])
        FALSE = pp.Literal('False').setParseAction(
            lambda s, loc, toks: toks[0])

        AND = pp.Literal('and').setParseAction(lambda s, loc, toks: toks[0])
        OR = pp.Literal('or').setParseAction(lambda s, loc, toks: toks[0])
        NOT = pp.Literal('not').setParseAction(lambda s, loc, toks: toks[0])

        #
        # Expression's elements
        #
        LEFT_PAREN = pp.Literal('(')
        RIGHT_PAREN = pp.Literal(')')
        LEFT_SPAREN = pp.Literal('[')
        RIGHT_SPAREN = pp.Literal(']')
        COMMA = pp.Literal(',')
        SEMICOLON = pp.Literal(';')

        # OID's syntax elements
        COLUMN = pp.Literal(':')
        TYPE_NEW = pp.Literal('@')
        TYPE_OLD = pp.Literal('#')

        # Unescaped String prefix
        UNESCAPE_STR = pp.Literal('r')

        #
        # Operators
        #

        ASSIGN = pp.Literal('=')
        # OIDs concat operator
        DOT = pp.Literal('.')

        PLUS_PLUS = pp.Literal('++')
        MINUS_MINUS = pp.Literal('--')

        POWER = pp.Literal('**')

        PLUS = pp.Literal('+')
        MINUS = pp.Literal('-')
        MULTI = pp.Literal('*')
        DIV = pp.Literal('/')
        MOD = pp.Literal('%')

        EQ = pp.Literal('eq')
        EQUAL = pp.Literal('==')
        NEQUAL = pp.Literal('!=')

        REGEXPQUAL = pp.Literal('=~')

        GT = pp.Literal('>')
        LT = pp.Literal('<')
        GEQ = pp.Literal('>=')
        LEQ = pp.Literal('<=')

        LOGIC_NOT = pp.Literal('!')
        LOGIC_AND = pp.Literal('&&')
        LOGIC_OR = pp.Literal('||')

        BITAND = pp.Literal('&')
        BITOR = pp.Literal('|')
        BITXOR = pp.Literal('^')

        # One's complement operator
        BITONE = pp.Literal('~')

        IF = pp.Literal('if')
        THEN = pp.Literal('then')
        ELSE = pp.Literal('else')

        TRY = pp.Literal('try')
        CATCH = pp.Literal('catch')

        #---------------------------------------------------------------------------*/
        #  Language Types
        #---------------------------------------------------------------------------*/

        #
        # Literals
        #

        QUOTED = pp.QuotedString('"', escChar='\\') | pp.QuotedString(
            "'", escChar='\\')

        STRING = pp.originalTextFor(QUOTED)

        RSTRING = pp.originalTextFor(UNESCAPE_STR + QUOTED)

        #
        # Variable identifiers ($a, $a1, $_a,  $a_a123)
        #
        VAR_ID = pp.Word('$', pp.alphanums + '_', min=2)

        #
        # Function identifiers
        #
        FUNCTION_ID = pp.Word(pp.alphas, pp.alphanums + '_', min=1)

        #
        # Numbers
        #
        HEX = pp.originalTextFor(pp.Regex('[0][xX][0-9a-fA-F]+'))

        DEC = pp.originalTextFor(pp.Word('0') | pp.Regex('[1-9][0-9]*'))

        OCTAL = pp.originalTextFor(pp.Regex('[0][0-7]+'))

        FLOAT1 = pp.Regex('[0-9]+[\.][0-9]+([eE][+-]?[0-9]+)*')

        FLOAT2 = pp.Regex('[0-9]+[\.]([eE][+-]?[0-9]+)*')

        FLOAT = pp.originalTextFor(FLOAT1 | FLOAT2)

        #
        # Special identifiers  { <name> (@|#) }
        #
        DATA_ID = pp.originalTextFor(
            pp.Combine(
                pp.Word('{') + pp.Word(pp.alphas, pp.alphanums + '_-.') +
                pp.Word('@#') + pp.Word('}')))

        #----------------------------------------------------------------------#
        #----------------------------------------------------------------------#
        #
        # GRAMMAR SYNTAX
        #
        #----------------------------------------------------------------------#
        #----------------------------------------------------------------------#

        #----------------------------------------------------------------------#
        #  variabile
        #  constants    (1, 1.0, 'c', "foo", ecc...)
        #  ( ... )
        #----------------------------------------------------------------------#

        OID_SEQUENCE = pp.Regex('[0-9]+[\.][0-9]+([\.][0-9]+)+')

        constant = (
            TRUE.setParseAction(lambda s, loc, toks: self.f.createBool(True))
            |
            FALSE.setParseAction(lambda s, loc, toks: self.f.createBool(False))
            | HEX.setParseAction(
                lambda s, loc, toks: self.f.createInteger(int(toks[1], 16)))
            | (~(OID_SEQUENCE) + FLOAT).setParseAction(
                lambda s, loc, toks: self.f.createFloat(float(toks[0])))
            | OCTAL.setParseAction(
                lambda s, loc, toks: self.f.createInteger(int(toks[1], 8)))
            | DEC.setParseAction(
                lambda s, loc, toks: self.f.createInteger(int(toks[1], 10)))
            | STRING.setParseAction(
                lambda s, loc, toks: self.f.createString(toks, True))
            | RSTRING.setParseAction(
                lambda s, loc, toks: self.f.createString(toks[1:], True)))

        cond_expr = pp.Forward()

        #----------------------------------------------------------------------#
        # Primary Expr
        #----------------------------------------------------------------------#

        primary_expr = (
            (LEFT_PAREN.suppress() + cond_expr + RIGHT_PAREN.suppress()
             ).setParseAction(lambda s, loc, toks: toks[0])
            | VAR_ID.setParseAction(
                lambda s, loc, toks: self.f.createIdentifier(toks[0]))
            | DATA_ID.setParseAction(
                lambda s, loc, toks: self.f.createDataIdentifier(toks[1]))
            | constant)

        #----------------------------------------------------------------------#
        # POSTFIX EXPRESSION
        #----------------------------------------------------------------------#
        # foo()
        # for(a,b,...)
        # $id()
        # $id
        # $id(a,b,...)
        #----------------------------------------------------------------------#

        #
        # Named argument
        #
        named_argument_value = pp.Forward()

        name_argument = (
            FUNCTION_ID + ASSIGN.suppress() + named_argument_value
        ).setParseAction(
            lambda s, loc, toks: self.f.createNamedArgument(toks[0], toks[1]))

        #
        # Simple argument
        #
        simple_argument_value = pp.Forward()

        #
        # 1, 2, 3, foo=10, bar=10234
        #
        argument = name_argument | simple_argument_value

        argument_expr_list = (argument +
                              pp.ZeroOrMore(COMMA.suppress() + argument))

        #----------------------------------------------------------------------#
        #  ( ), (a,b,c,...)
        #----------------------------------------------------------------------#

        def _call_expr_callback(s, loc, toks):
            args = toks.get('args')
            if args is None: args = []
            else: args = list(args)
            return ('CALL', args)

        call_expr = (
            LEFT_PAREN.suppress() + pp.Optional(argument_expr_list('args')) +
            RIGHT_PAREN.suppress()).setParseAction(_call_expr_callback)

        #----------------------------------------------------------------------#
        # [], [;], [i], [i;], [;j]   [i;j]
        #----------------------------------------------------------------------#

        def _range_expr_callback(s, loc, toks):
            args = []
            start = toks.get('start')
            args.append(start)
            if 'end' in toks:
                end = toks.get('end')
                args.append(end)
            return ('RANGE', args)

        range_value = pp.Forward()

        range_expr = (
            LEFT_SPAREN.suppress() + pp.Optional(range_value)('start') +
            pp.Optional(SEMICOLON.suppress() + pp.Optional(range_value)('end'))
            + RIGHT_SPAREN.suppress()).setParseAction(_range_expr_callback)

        #----------------------------------------------------------------------#

        call_or_range = range_expr | call_expr

        def _func_callback(s, loc, toks):

            if len(toks) == 1:
                return toks[0]

            current_t = toks[0]

            for t in toks[1:]:
                f_type, args = t

                if f_type == 'CALL':
                    current_t = self.f.createCallOp(current_t, args)
                elif f_type == 'RANGE':
                    current_t = self.f.createRangeOp(current_t, args)
                else:
                    raise Exception("ERROR")

            return current_t

        postfix_expr = (
            (FUNCTION_ID +
             pp.OneOrMore(call_or_range)).setParseAction(_func_callback)
            | (primary_expr +
               pp.ZeroOrMore(call_or_range)).setParseAction(_func_callback))

        #----------------------------------------------------------------------#
        #  UNARY EXPRESSION
        #----------------------------------------------------------------------#
        #  <expr>
        #  <expr>()
        #  <expr>[]
        #  + <expr>
        #  - <expr>
        #  ~ <expr>
        #  ! <expr>
        #---------------------------------------------------------------------------*/

        unary_expr = pp.Forward()

        calc_expr = (
            postfix_expr
            | (PLUS_PLUS.suppress() + unary_expr).setParseAction(
                lambda s, loc, toks: self.f.createAddAddOp(toks[0]))
            | (MINUS_MINUS.suppress() + unary_expr).setParseAction(
                lambda s, loc, toks: self.f.createSubSubOp(toks[0]))
            | (PLUS.suppress() +
               unary_expr).setParseAction(lambda s, loc, toks: toks[0])
            | (MINUS.suppress() + unary_expr).setParseAction(
                lambda s, loc, toks: self.f.createMinusOp(toks[0]))
            |
            ((LOGIC_NOT | NOT).suppress() + unary_expr
             ).setParseAction(lambda s, loc, toks: self.f.createNotOp(toks[0]))
            | (BITONE.suppress() + unary_expr).setParseAction(
                lambda s, loc, toks: self.f.createBitOneOp(toks[0])))

        #---------------------------------------------------------------------------*/
        # OID Expressions
        #---------------------------------------------------------------------------*/
        # These expressions rappresent SNMP OID values:
        #
        #    <oid expression>  [':' <community-expr>] '@' [ <host-expr> [':' <port-expr>] ]
        #
        # where <oid expression> is:
        #
        #    n.n.n '.' <exp-1> '.' <exp-2> '.' <exp-n>
        #
        #---------------------------------------------------------------------------*/

        #
        #  The DOT ('.') operator is a bit tricky: expressions are converted
        #  into strings and concatenated.
        #
        #  This means that if i concatenate OID  1.2.3.4  with the float
        #  literal 5.6  the result is  1.2.3.4.5.6
        #

        def _oid_compositon_callback(s, loc, toks):
            toks = list(toks)

            expr = toks.pop(0)
            while toks:
                expr = self.f.createConcatOID(expr, toks.pop(0))
            return expr

        def _oid_callback(s, loc, toks):
            return self.f.createOID(toks[1])

        oid_compositon = (
            pp.originalTextFor(OID_SEQUENCE).setParseAction(_oid_callback) +
            pp.ZeroOrMore(DOT.suppress() + (
                pp.originalTextFor(OID_SEQUENCE).setParseAction(_oid_callback)
                | postfix_expr))).setParseAction(_oid_compositon_callback)

        def _snmp_single_expr_callback(s, loc, toks):
            oid = toks['oid']
            community = toks['community'] if 'community' in toks else None
            t = toks['type']
            node = toks['node'] if 'node' in toks else None
            port = toks['port'] if 'port' in toks else None
            return self.f.createSnmpValue(oid, community, t, node, port)

        snmp_single_expr = (
            oid_compositon('oid') +
            pp.Optional(COLUMN.suppress() + postfix_expr)('community') +
            pp.originalTextFor(TYPE_OLD | TYPE_NEW)('type') + pp.Optional(
                postfix_expr('node') +
                pp.Optional(COLUMN.suppress() + postfix_expr)('port'))
        ).setParseAction(_snmp_single_expr_callback)

        #----------------------------------------------------------------------#
        # 1.3.6.1.2.1.1@ [ ]
        #----------------------------------------------------------------------#

        def _func_callback_x(s, loc, toks):
            toks = list(toks)
            if len(toks) == 1: return toks[0]
            expr = toks[0]
            range_args = toks[1][1]
            return self.f.createRangeOp(expr, range_args)

        snmp_value_expr = (
            snmp_single_expr +
            pp.Optional(range_expr)).setParseAction(_func_callback_x)

        #----------------------------------------------------------------------#
        # IF <expr> THEN <expr ELSE <expr>
        #----------------------------------------------------------------------#

        def _if_callback(s, loc, toks):
            e1 = toks.get('e1')
            e2 = toks.get('e2')
            e3 = toks.get('e3')
            return self.f.createIf(e1, e2, e3)

        if_expr = (IF.suppress() + cond_expr("e1") + THEN.suppress() +
                   cond_expr("e2") + ELSE.suppress() +
                   cond_expr("e3")).setParseAction(_if_callback)

        #----------------------------------------------------------------------#
        # try <expr> catch [ <id> ] ( <expr> ) [ catch <id> ( <expr> ) ....]
        #----------------------------------------------------------------------#

        def _catch_expr_callback(s, loc, toks):
            ex_name = toks.get('exception')
            expr = toks.get('expr')

            return (ex_name, expr)

        def _try_expr_callback(s, loc, toks):
            body = toks['body']
            catch_list = list(toks['catch_list'])

            return self.f.createTry(body, catch_list)

        #
        # catch [ <expr> ] ( <expr> )
        #
        catch_expr_body = pp.Forward()

        catch_expr = (
            pp.Optional(FUNCTION_ID)('exception') + LEFT_PAREN.suppress() +
            pp.Optional(cond_expr)('expr') +
            RIGHT_PAREN.suppress()).setParseAction(_catch_expr_callback)

        #
        # try <expr> [ catch <expr> ( <expr> ) .... ]
        #
        catch_list = CATCH.suppress() + pp.OneOrMore(catch_expr)

        try_expr = (
            TRY.suppress() + cond_expr('body') +
            catch_list('catch_list')).setParseAction(_try_expr_callback)

        #----------------------------------------------------------------------#
        # UNARY EXPRESSION
        #----------------------------------------------------------------------#

        unary_expr <<= (if_expr | try_expr | snmp_value_expr | calc_expr)

        #----------------------------------------------------------------------#
        # OPERATORS
        #----------------------------------------------------------------------#

        OP_MAP = {
            str(POWER.match): self.f.createPowerOp,
            str(MULTI.match): self.f.createMultiOp,
            str(DIV.match): self.f.createDivOp,
            str(MOD.match): self.f.createModOp,
            str(PLUS.match): self.f.createAddOp,
            str(MINUS.match): self.f.createSubOp,
            str(LT.match): self.f.createLtOp,
            str(GT.match): self.f.createGtOp,
            str(LEQ.match): self.f.createLEqOp,
            str(GEQ.match): self.f.createGEqOp,
            str(EQUAL.match): self.f.createEqOp,
            str(EQ.match): self.f.createEqOp,
            str(NEQUAL.match): self.f.createNotEqOp,
            str(REGEXPQUAL.match): self.f.createRegExpEqOp,
            str(BITAND.match): self.f.createBitAndOp,
            str(BITXOR.match): self.f.createBitXOrOp,
            str(BITOR.match): self.f.createBitOrOp,
            str(AND.match): self.f.createAndOp,
            str(LOGIC_AND.match): self.f.createAndOp,
            str(OR.match): self.f.createOrOp,
            str(LOGIC_OR.match): self.f.createOrOp,
        }

        def _op_callback(s, loc, toks):
            l = list(toks)
            if len(l) == 1: return l

            expr = l.pop(0)
            while l:
                op, expr2 = l.pop(0), l.pop(0)
                op_callback = OP_MAP[op]
                expr = op_callback(expr, expr2)
            return expr

        expr = unary_expr

        #// a ** b
        expr = (expr +
                pp.ZeroOrMore(POWER + expr)).setParseAction(_op_callback)

        #// a * b
        #// a / c
        #// a % c
        expr = (expr + pp.ZeroOrMore((MULTI | DIV | MOD) +
                                     expr)).setParseAction(_op_callback)

        #// a + b
        #// a - b
        expr = (
            expr +
            pp.ZeroOrMore((PLUS | MINUS) + expr)).setParseAction(_op_callback)

        #// a < b
        #// a > b
        #// a <= b
        #// a >= b
        expr = (expr + pp.ZeroOrMore((LT | GT | LEQ | GEQ) +
                                     expr)).setParseAction(_op_callback)

        #// a == b
        #// a != b
        #// a ~= b
        expr = (expr + pp.ZeroOrMore((EQUAL | EQ | NEQUAL | REGEXPQUAL) +
                                     expr)).setParseAction(_op_callback)

        #// a & b
        expr = (expr +
                pp.ZeroOrMore(BITAND + expr)).setParseAction(_op_callback)

        #// a ^ b
        expr = (expr +
                pp.ZeroOrMore(BITXOR + expr)).setParseAction(_op_callback)

        #// a | b
        expr = (expr +
                pp.ZeroOrMore(BITOR + expr)).setParseAction(_op_callback)

        #// a && b
        expr = (expr + pp.ZeroOrMore((LOGIC_AND | AND) +
                                     expr)).setParseAction(_op_callback)

        #//  a || b
        expr = (
            expr +
            pp.ZeroOrMore((LOGIC_OR | OR) + expr)).setParseAction(_op_callback)

        #----------------------------------------------------------------------#
        # Recursive rules
        #----------------------------------------------------------------------#

        cond_expr <<= expr

        simple_argument_value <<= cond_expr
        named_argument_value <<= cond_expr
        range_value <<= cond_expr

        #----------------------------------------------------------------------#
        # Initiali RULE
        #----------------------------------------------------------------------#

        lang_expr = (START + cond_expr + END)

        return lang_expr
コード例 #17
0
def act_parser_end(token):
    print("parser_end: " + str(token))


comment_parser = pp.Group((pp.Literal("//") + pp.restOfLine)
                          | pp.cStyleComment).setParseAction(act_comment)

pp_key1 = pp.Keyword("hoge")
pp_key2 = pp.Keyword("fuga")
pp_sc = pp.Literal(";")

statement = pp.Group(
    pp.Empty().setParseAction(act_parser_start) +
    (pp_key1.setParseAction(act_keyword) +
     pp_key2.setParseAction(act_keyword)).ignore(comment_parser) +
    (pp_sc.setParseAction(act_sc) + pp.Optional(comment_parser)) +
    pp.Empty().setParseAction(act_parser_end))
parser = statement[1, ...]

test_text = """\
hoge fuga;	// comment1
hoge /* comment2-1 */ fuga;	/* comment2-2 */
// comment3
hoge fuga;	// comment4
"""

ret = parser.parseString(test_text)
print(ret)
"""
[result]
parser_start: []
コード例 #18
0
list_type = tag("list") + P.Combine(
    kw('list') - lit('(') - identifier - lit(')'))
any_type = P.Group(enum_type | array_type | list_type
                   | scalar_type).setName("type name")

# Structs
pad_member = P.Group(kw('pad') - s('(') - integer - s(')'))
discriminator_member = P.Group(
    tag('discriminator') + any_type + identifier + s('==') + s('?'))
type_member = P.Group(tag('type') + any_type + identifier + s('==') + integer)
data_member = P.Group(tag('data') + any_type - identifier)

struct_param_name = kw("align")
struct_param = P.Group(struct_param_name - s('=') - word)
struct_param_list = P.Forward()
struct_param_list << struct_param + P.Optional(
    s(',') - P.Optional(struct_param_list))

struct_member = pad_member | type_member | discriminator_member | data_member
parent = (s(':') - identifier) | tag(None)
struct = kw('struct') - identifier - P.Group(P.Optional(s('(') - struct_param_list - s(')'))) - parent - s('{') + \
         P.Group(P.ZeroOrMore(struct_member - s(';'))) + \
         s('}') - s(';')

# Enums
enum_param_name = kw("wire_type") | kw("bitmask") | kw("complete")
enum_param = P.Group(enum_param_name - s('=') - word)
enum_param_list = P.Forward()
enum_param_list << enum_param + P.Optional(
    s(',') + P.Optional(enum_param_list))

enum_member_param_name = kw("virtual")
コード例 #19
0
ファイル: parser.py プロジェクト: vascotenner/holoviews
class CompositorSpec(Parser):
    """
    The syntax for defining a set of compositor is as follows:

    [ mode op(spec) [settings] value ]+

    The components are:

    mode      : Operation mode, either 'data' or 'display'.
    group     : Value identifier with capitalized initial letter.
    op        : The name of the operation to apply.
    spec      : Overlay specification of form (A * B) where A and B are
                 dotted path specifications.
    settings  : Optional list of keyword arguments to be used as
                parameters to the operation (in square brackets).
    """

    mode = pp.Word(pp.alphas + pp.nums + '_').setResultsName("mode")

    op = pp.Word(pp.alphas + pp.nums + '_').setResultsName("op")

    overlay_spec = pp.nestedExpr(opener='(', closer=')',
                                 ignoreExpr=None).setResultsName("spec")

    value = pp.Word(pp.alphas + pp.nums + '_').setResultsName("value")

    op_settings = pp.nestedExpr(opener='[', closer=']',
                                ignoreExpr=None).setResultsName("op_settings")

    compositor_spec = pp.OneOrMore(
        pp.Group(mode + op + overlay_spec + value + pp.Optional(op_settings)))

    @classmethod
    def parse(cls, line, ns={}):
        """
        Parse compositor specifications, returning a list Compositors
        """
        definitions = []
        parses = [p for p in cls.compositor_spec.scanString(line)]
        if len(parses) != 1:
            raise SyntaxError("Invalid specification syntax.")
        else:
            e = parses[0][2]
            processed = line[:e]
            if (processed.strip() != line.strip()):
                raise SyntaxError("Failed to parse remainder of string: %r" %
                                  line[e:])

        opmap = {op.__name__: op for op in Compositor.operations}
        for group in cls.compositor_spec.parseString(line):

            if ('mode'
                    not in group) or group['mode'] not in ['data', 'display']:
                raise SyntaxError(
                    "Either data or display mode must be specified.")
            mode = group['mode']

            kwargs = {}
            operation = opmap[group['op']]
            spec = ' '.join(group['spec'].asList()[0])

            if group['op'] not in opmap:
                raise SyntaxError(
                    "Operation %s not available for use with compositors." %
                    group['op'])
            if 'op_settings' in group:
                kwargs = cls.todict(group['op_settings'][0], 'brackets', ns=ns)

            definition = Compositor(str(spec), operation, str(group['value']),
                                    mode, **kwargs)
            definitions.append(definition)
        return definitions
コード例 #20
0
def _build_tgrep_parser(set_parse_actions=True):
    '''
    Builds a pyparsing-based parser object for tokenizing and
    interpreting tgrep search strings.
    '''
    tgrep_op = (pyparsing.Optional('!') +
                pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*'))
    tgrep_qstring = pyparsing.QuotedString(quoteChar='"',
                                           escChar='\\',
                                           unquoteResults=False)
    tgrep_node_regex = pyparsing.QuotedString(quoteChar='/',
                                              escChar='\\',
                                              unquoteResults=False)
    tgrep_qstring_icase = pyparsing.Regex(
        'i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"')
    tgrep_node_regex_icase = pyparsing.Regex(
        'i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/')
    tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+')
    tgrep_expr = pyparsing.Forward()
    tgrep_relations = pyparsing.Forward()
    tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')'
    tgrep_nltk_tree_pos = (pyparsing.Literal('N(') + pyparsing.Optional(
        pyparsing.Word(pyparsing.nums) + ',' + pyparsing.Optional(
            pyparsing.delimitedList(pyparsing.Word(pyparsing.nums), delim=',')
            + pyparsing.Optional(','))) + ')')
    tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+')
    tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label)
    # see _tgrep_segmented_pattern_action
    tgrep_node_label_use_pred = tgrep_node_label_use.copy()
    macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+')
    macro_name.setWhitespaceChars('')
    macro_use = pyparsing.Combine('@' + macro_name)
    tgrep_node_expr = (tgrep_node_label_use_pred | macro_use
                       | tgrep_nltk_tree_pos | tgrep_qstring_icase
                       | tgrep_node_regex_icase | tgrep_qstring
                       | tgrep_node_regex | '*' | tgrep_node_literal)
    tgrep_node_expr2 = (
        (tgrep_node_expr + pyparsing.Literal('=').setWhitespaceChars('') +
         tgrep_node_label.copy().setWhitespaceChars('')) | tgrep_node_expr)
    tgrep_node = (tgrep_parens | (pyparsing.Optional("'") + tgrep_node_expr2 +
                                  pyparsing.ZeroOrMore("|" + tgrep_node_expr)))
    tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']'
    tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node)
    tgrep_rel_conjunction = pyparsing.Forward()
    tgrep_rel_conjunction << (
        tgrep_relation +
        pyparsing.ZeroOrMore(pyparsing.Optional('&') + tgrep_rel_conjunction))
    tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore(
        "|" + tgrep_relations)
    tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations)
    tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional(
        tgrep_relations)
    tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(':' + tgrep_expr_labeled)
    macro_defn = (pyparsing.Literal('@') + pyparsing.White().suppress() +
                  macro_name + tgrep_expr2)
    tgrep_exprs = (
        pyparsing.Optional(macro_defn +
                           pyparsing.ZeroOrMore(';' + macro_defn) + ';') +
        tgrep_expr2 + pyparsing.ZeroOrMore(';' + (macro_defn | tgrep_expr2)) +
        pyparsing.ZeroOrMore(';').suppress())
    if set_parse_actions:
        tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action)
        tgrep_node_label_use_pred.setParseAction(
            _tgrep_node_label_pred_use_action)
        macro_use.setParseAction(_tgrep_macro_use_action)
        tgrep_node.setParseAction(_tgrep_node_action)
        tgrep_node_expr2.setParseAction(_tgrep_bind_node_label_action)
        tgrep_parens.setParseAction(_tgrep_parens_action)
        tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action)
        tgrep_relation.setParseAction(_tgrep_relation_action)
        tgrep_rel_conjunction.setParseAction(_tgrep_conjunction_action)
        tgrep_relations.setParseAction(_tgrep_rel_disjunction_action)
        macro_defn.setParseAction(_macro_defn_action)
        # the whole expression is also the conjunction of two
        # predicates: the first node predicate, and the remaining
        # relation predicates
        tgrep_expr.setParseAction(_tgrep_conjunction_action)
        tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action)
        tgrep_expr2.setParseAction(
            functools.partial(_tgrep_conjunction_action, join_char=':'))
        tgrep_exprs.setParseAction(_tgrep_exprs_action)
    return tgrep_exprs.ignore('#' + pyparsing.restOfLine)
コード例 #21
0
# Initialize non-ascii unicode code points in the Basic Multilingual Plane.
unicode_printables = u''.join(
    six.unichr(c) for c in range(128, 65536) if not six.unichr(c).isspace())

# Does not like comma. No Literals from above allowed.
valid_identifier_chars = ((unicode_printables + pyparsing.alphanums +
                           ".-_#!$%&'*+/:;?@[\\]^`|~"))

metric_name = (pyparsing.Word(valid_identifier_chars, min=1,
                              max=255)("metric_name"))
dimension_name = pyparsing.Word(valid_identifier_chars + ' ', min=1, max=255)
dimension_value = pyparsing.Word(valid_identifier_chars + ' ', min=1, max=255)

MINUS = pyparsing.Literal('-')
integer_number = pyparsing.Word(pyparsing.nums)
decimal_number = (pyparsing.Optional(MINUS) + integer_number +
                  pyparsing.Optional("." + integer_number))
decimal_number.setParseAction(lambda tokens: "".join(tokens))

max = pyparsing.CaselessLiteral("max")
min = pyparsing.CaselessLiteral("min")
avg = pyparsing.CaselessLiteral("avg")
count = pyparsing.CaselessLiteral("count")
sum = pyparsing.CaselessLiteral("sum")
last = pyparsing.CaselessLiteral("last")
func = (max | min | avg | count | sum | last)("func")

less_than_op = ((pyparsing.CaselessLiteral("<")
                 | pyparsing.CaselessLiteral("lt")))
less_than_eq_op = ((pyparsing.CaselessLiteral("<=")
                    | pyparsing.CaselessLiteral("lte")))
コード例 #22
0
LPAR, RPAR = map(pp.Suppress, "()")
numvalue = pp.Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?")
term = pp.Forward()
factor = pp.Forward()

addsub = pp.oneOf('+ -')
muldiv = pp.oneOf('* /')
compare = pp.Regex(">=|<=|!=|>|<|==").setName("compare")
NOT_ = pp.Keyword("NOT").setName("NOT")
AND_ = pp.Keyword("AND").setName("AND")
OR_ = pp.Keyword("OR").setName("OR")

symbol = pp.Word(pp.alphas).setName("symbol")
propsymbol = pp.Group(symbol + "." + symbol).setName("propsymbol")
formula = pp.Optional(addsub) + term + pp.ZeroOrMore(addsub + term)
term << (factor + pp.ZeroOrMore(muldiv + factor))
factor << (numvalue | propsymbol | LPAR + formula + RPAR)

factor = numvalue | propsymbol
# condition = pp.Group(factor + compare + factor)

formula = pp.infixNotation(factor, [
    (
        muldiv,
        2,
        pp.opAssoc.LEFT,
    ),
    (
        addsub,
        2,
コード例 #23
0
class SyslogParser(text_parser.PyparsingMultiLineTextParser):
    """Parses syslog formatted log files"""
    NAME = 'syslog'

    DESCRIPTION = 'Syslog Parser'

    _ENCODING = 'utf-8'

    _plugin_classes = {}

    # The reporter and facility fields can contain any printable character, but
    # to allow for processing of syslog formats that delimit the reporter and
    # facility with printable characters, we remove certain common delimiters
    # from the set of printable characters.
    _REPORTER_CHARACTERS = ''.join(
        [c for c in pyparsing.printables if c not in [':', '[', '<']])
    _FACILITY_CHARACTERS = ''.join(
        [c for c in pyparsing.printables if c not in [':', '>']])

    _SYSLOG_SEVERITY = [
        'EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG'
    ]

    _OFFSET_PREFIX = ['-', '+']

    _BODY_CONTENT = (
        r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2})|' \
        r'($|\n\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}' \
        r'[\+|-]\d{2}:\d{2}\s))')

    _VERIFICATION_REGEX = re.compile(r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s' +
                                     _BODY_CONTENT)

    # The Chrome OS syslog messages are of a format beginning with an
    # ISO 8601 combined date and time expression with timezone designator:
    #   2016-10-25T12:37:23.297265-07:00
    #
    # This will then be followed by the SYSLOG Severity which will be one of:
    #   EMERG,ALERT,CRIT,ERR,WARNING,NOTICE,INFO,DEBUG
    #
    # 2016-10-25T12:37:23.297265-07:00 INFO
    _CHROMEOS_VERIFICATION_REGEX = re.compile(
        r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.'
        r'\d{6}[\+|-]\d{2}:\d{2}\s'
        r'(EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG)' + _BODY_CONTENT)

    _PYPARSING_COMPONENTS = {
        'year':
        text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year'),
        'two_digit_month':
        (text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
            'two_digit_month')),
        'month':
        text_parser.PyparsingConstants.MONTH.setResultsName('month'),
        'day':
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day'),
        'hour':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('hour'),
        'minute':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minute'),
        'second':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('second'),
        'fractional_seconds':
        pyparsing.Word(pyparsing.nums).setResultsName('fractional_seconds'),
        'hostname':
        pyparsing.Word(pyparsing.printables).setResultsName('hostname'),
        'reporter':
        pyparsing.Word(_REPORTER_CHARACTERS).setResultsName('reporter'),
        'pid':
        text_parser.PyparsingConstants.PID.setResultsName('pid'),
        'facility':
        pyparsing.Word(_FACILITY_CHARACTERS).setResultsName('facility'),
        'severity':
        pyparsing.oneOf(_SYSLOG_SEVERITY).setResultsName('severity'),
        'body':
        pyparsing.Regex(_BODY_CONTENT, re.DOTALL).setResultsName('body'),
        'comment_body':
        pyparsing.SkipTo(' ---').setResultsName('body'),
        'iso_8601_offset':
        (pyparsing.oneOf(_OFFSET_PREFIX) +
         text_parser.PyparsingConstants.TWO_DIGITS + pyparsing.Optional(
             pyparsing.Literal(':') +
             text_parser.PyparsingConstants.TWO_DIGITS))
    }

    _PYPARSING_COMPONENTS['date'] = (
        _PYPARSING_COMPONENTS['month'] + _PYPARSING_COMPONENTS['day'] +
        _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') +
        _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') +
        _PYPARSING_COMPONENTS['second'] + pyparsing.Optional(
            pyparsing.Suppress('.') +
            _PYPARSING_COMPONENTS['fractional_seconds']))

    _PYPARSING_COMPONENTS['iso_8601_date'] = pyparsing.Combine(
        _PYPARSING_COMPONENTS['year'] + pyparsing.Literal('-') +
        _PYPARSING_COMPONENTS['two_digit_month'] + pyparsing.Literal('-') +
        _PYPARSING_COMPONENTS['day'] + pyparsing.Literal('T') +
        _PYPARSING_COMPONENTS['hour'] + pyparsing.Literal(':') +
        _PYPARSING_COMPONENTS['minute'] + pyparsing.Literal(':') +
        _PYPARSING_COMPONENTS['second'] + pyparsing.Literal('.') +
        _PYPARSING_COMPONENTS['fractional_seconds'] +
        _PYPARSING_COMPONENTS['iso_8601_offset'],
        joinString='',
        adjacent=True).setResultsName('iso_8601_date')

    _CHROMEOS_SYSLOG_LINE = (
        _PYPARSING_COMPONENTS['iso_8601_date'] +
        _PYPARSING_COMPONENTS['severity'] + _PYPARSING_COMPONENTS['reporter'] +
        pyparsing.Optional(pyparsing.Suppress(':')) + pyparsing.Optional(
            pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
            pyparsing.Suppress(']')) +
        pyparsing.Optional(pyparsing.Suppress(':')) +
        _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())

    _SYSLOG_LINE = (
        _PYPARSING_COMPONENTS['date'] + _PYPARSING_COMPONENTS['hostname'] +
        _PYPARSING_COMPONENTS['reporter'] + pyparsing.Optional(
            pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
            pyparsing.Suppress(']')) + pyparsing.Optional(
                pyparsing.Suppress('<') + _PYPARSING_COMPONENTS['facility'] +
                pyparsing.Suppress('>')) +
        pyparsing.Optional(pyparsing.Suppress(':')) +
        _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())

    _SYSLOG_COMMENT = (_PYPARSING_COMPONENTS['date'] +
                       pyparsing.Suppress(':') + pyparsing.Suppress('---') +
                       _PYPARSING_COMPONENTS['comment_body'] +
                       pyparsing.Suppress('---') + pyparsing.LineEnd())

    _KERNEL_SYSLOG_LINE = (
        _PYPARSING_COMPONENTS['date'] +
        pyparsing.Literal('kernel').setResultsName('reporter') +
        pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['body'] +
        pyparsing.lineEnd())

    LINE_STRUCTURES = [('syslog_line', _SYSLOG_LINE),
                       ('syslog_line', _KERNEL_SYSLOG_LINE),
                       ('syslog_comment', _SYSLOG_COMMENT),
                       ('chromeos_syslog_line', _CHROMEOS_SYSLOG_LINE)]

    _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

    def __init__(self):
        """Initializes a parser."""
        super(SyslogParser, self).__init__()
        self._last_month = 0
        self._maximum_year = 0
        self._plugin_by_reporter = {}
        self._year_use = 0

    def _UpdateYear(self, mediator, month):
        """Updates the year to use for events, based on last observed month.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      month (int): month observed by the parser, where January is 1.
    """
        if not self._year_use:
            self._year_use = mediator.GetEstimatedYear()
        if not self._maximum_year:
            self._maximum_year = mediator.GetLatestYear()

        if not self._last_month:
            self._last_month = month
            return

        # Some syslog daemons allow out-of-order sequences, so allow some leeway
        # to not cause Apr->May->Apr to cause the year to increment.
        # See http://bugzilla.adiscon.com/show_bug.cgi?id=527
        if self._last_month > (month + 1):
            if self._year_use != self._maximum_year:
                self._year_use += 1
        self._last_month = month

    def EnablePlugins(self, plugin_includes):
        """Enables parser plugins.

    Args:
      plugin_includes (list[str]): names of the plugins to enable, where None
          or an empty list represents all plugins. Note that the default plugin
          is handled separately.
    """
        super(SyslogParser, self).EnablePlugins(plugin_includes)

        self._plugin_by_reporter = {}
        for plugin in self._plugins:
            self._plugin_by_reporter[plugin.REPORTER] = plugin

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a matching entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in self._SUPPORTED_KEYS:
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        if key == 'chromeos_syslog_line':
            timestamp = timelib.Timestamp.FromTimeString(
                structure.iso_8601_date[0])
        else:
            month = timelib.MONTH_DICT.get(structure.month.lower(), None)
            if not month:
                parser_mediator.ProduceParserError(
                    'Invalid month value: {0:s}'.format(month))
                return

            self._UpdateYear(parser_mediator, month)
            timestamp = timelib.Timestamp.FromTimeParts(
                year=self._year_use,
                month=month,
                day=structure.day,
                hour=structure.hour,
                minutes=structure.minute,
                seconds=structure.second,
                timezone=parser_mediator.timezone)

        plugin = None
        if key == 'syslog_comment':
            event_data = SyslogCommentEventData()
            event_data.body = structure.body
            # TODO: pass line number to offset or remove.
            event_data.offset = 0

        else:
            event_data = SyslogLineEventData()
            event_data.body = structure.body
            event_data.hostname = structure.hostname or None
            # TODO: pass line number to offset or remove.
            event_data.offset = 0
            event_data.pid = structure.pid
            event_data.reporter = structure.reporter
            event_data.severity = structure.severity

            plugin = self._plugin_by_reporter.get(structure.reporter, None)
            if plugin:
                attributes = {
                    'hostname': structure.hostname,
                    'severity': structure.severity,
                    'reporter': structure.reporter,
                    'pid': structure.pid,
                    'body': structure.body
                }

                try:
                    # TODO: pass event_data instead of attributes.
                    plugin.Process(parser_mediator, timestamp, attributes)

                except errors.WrongPlugin:
                    plugin = None

        if not plugin:
            event = time_events.TimestampEvent(
                timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
            parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, unused_parser_mediator, lines):
        """Verifies that this is a syslog-formatted file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        return (re.match(self._VERIFICATION_REGEX, lines) or re.match(
            self._CHROMEOS_VERIFICATION_REGEX, lines)) is not None
コード例 #24
0
_comment = pp.Regex (r'#.*').suppress ()

_operator = _ident.copy ().setParseAction (_make_operator).setName ('operator')
_string = pp.quotedString.setParseAction (_make_string).setName ('string')
_open_brace = pp.Keyword ('{').suppress ()
_close_brace = pp.Keyword ('}').suppress ()

_boolean = (pp.Keyword ('true').setParseAction (_make_true).setName ('true') ^
            pp.Keyword ('false').setParseAction (_make_false).setName ('false'))

_procedure = pp.Forward ()
operation = pp.ZeroOrMore (_comment ^ _boolean ^ _number ^ _procedure ^ _string ^ _operator)
_procedure << pp.Group (_open_brace + operation + _close_brace).setName ('procedure').setParseAction (_make_procedure)

_named_procedure = pp.Group (_ident.copy ().setName ('Procedure name')
                             + pp.Optional (_comment)
                             + _procedure).setName ('Named procedure')

_grammar = pp.Dict (pp.ZeroOrMore (_comment ^ _named_procedure))



_logger = logging.getLogger (__name__)

TUType = Mapping[str, instruction.Procedure]

def front_end (options: Options) -> TUType:
    global _source_file
    _source_file = options.source_file

    global _debug_info_enabled
コード例 #25
0
    return pp.Keyword(kwd_str).setParseAction(pp.replaceWith(kwd_value))


TRUE = make_keyword("true", True)
FALSE = make_keyword("false", False)
NULL = make_keyword("null", None)

LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(pp.Suppress, "[]{}:")

jsonString = pp.dblQuotedString().setParseAction(pp.removeQuotes)
jsonNumber = ppc.number()

jsonObject = pp.Forward().setName("jsonObject")
jsonValue = pp.Forward().setName("jsonValue")
jsonElements = pp.delimitedList(jsonValue)
jsonArray = pp.Group(LBRACK + pp.Optional(jsonElements, []) + RBRACK)
jsonValue << (jsonString | jsonNumber | pp.Group(jsonObject) | jsonArray | TRUE
              | FALSE | NULL)
memberDef = pp.Group(jsonString + COLON + jsonValue).setName("jsonMember")
jsonMembers = pp.delimitedList(memberDef)
jsonObject << pp.Dict(LBRACE + pp.Optional(jsonMembers) + RBRACE)

jsonComment = pp.cppStyleComment
jsonObject.ignore(jsonComment)

if __name__ == "__main__":
    testdata = """
    {
        "glossary": {
            "title": "example glossary",
            "GlossDiv": {
コード例 #26
0
# parse utils
natural = pp.Word(pp.nums)

float_number = pp.Regex(r'(\-)?(\d+)?(\.)(\d*)?([eE][\-\+]\d+)?')

skipLine = pp.Suppress(skip_supress('\n'))

comment = pp.Suppress(pp.Literal(';')) + skipLine

optional_comment = pp.ZeroOrMore(comment)

word = pp.Word(pp.alphanums + "*")

line = pp.Group(
    pp.OneOrMore(float_number | word) + pp.Optional(comment))

lines = pp.Group(pp.OneOrMore(line))

brackets = pp.Suppress("[") + word + pp.Suppress("]")

# High level parsers
section = brackets + optional_comment + lines

many_sections = pp.Group(pp.OneOrMore(section))


# Parser for itp files
itp_parser = optional_comment + many_sections

# Parser for the atom section of mol2 files
コード例 #27
0
    'SIGMA+g': 'Σ+g',
    'PI': 'Π',
    'PIu': 'Πu',
    'PIg': 'Πg',
    'DELTA': 'Δ',
    'DELTAu': 'Δu',
    'DELTAg': 'Δg',
    'PHI': 'Φ',
    'PHIg': 'Φg',
    'PHIu': 'Φu'
}

integer = pp.Word(pp.nums)
molecule_Smult = integer.setResultsName('Smult')
molecule_irrep = pp.oneOf(orbital_irrep_labels).setResultsName('irrep')
molecule_Jstr = (pp.Combine(pp.Optional(pp.oneOf(('+', '-'))) + integer) +
                 pp.Optional(pp.Suppress('/') + '2')).setResultsName('Jstr')
molecule_term = (molecule_Smult + molecule_irrep +
                 pp.Optional(pp.Suppress('_') + molecule_Jstr))
term_label = pp.Combine(
    pp.Word(pp.srange('[A-Za-z]')) +
    pp.Optional(pp.oneOf(("'", '"')))).setResultsName('term_label')
molecule_term_with_label = (pp.Optional(term_label) +
                            pp.Suppress(pp.Optional('(')) + molecule_term +
                            pp.Suppress(pp.Optional(')')) + pp.StringEnd())


class MolecularTermSymbol(State):
    def parse_state(self, state_str):

        try:
コード例 #28
0
ファイル: rosettacode.py プロジェクト: timgates42/pyparsing
        pp.opAssoc.LEFT,
    ),
    (
        pp.oneOf("||"),
        2,
        pp.opAssoc.LEFT,
    ),
])

prt_list = pp.Group(pp.delimitedList(string | expr))
paren_expr = pp.Group(LPAR + expr + RPAR)

stmt = pp.Forward()
assignment_stmt = pp.Group(identifier + EQ + expr + SEMI)
while_stmt = pp.Group(WHILE - paren_expr + stmt)
if_stmt = pp.Group(IF - paren_expr + stmt + pp.Optional(ELSE + stmt))
print_stmt = pp.Group(PRINT - pp.Group(LPAR + prt_list + RPAR) + SEMI)
putc_stmt = pp.Group(PUTC - paren_expr + SEMI)
stmt_list = pp.Group(LBRACE + stmt[...] + RBRACE)
stmt <<= (pp.Group(SEMI)
          | assignment_stmt
          | while_stmt
          | if_stmt
          | print_stmt
          | putc_stmt
          | stmt_list).setName("statement")

code = stmt[...]
code.ignore(pp.cppStyleComment)

tests = [
コード例 #29
0
def _MathParser(math_stack):
  """Defines the entire math expression for BigQuery queries.

  Converts the expression into postfix notation. The stack is reversed
  (i.e. the last element acts the top of the stack).

  Actions do not occur unless parseString is called on the BNF returned.
  The actions will modify the original list that was passed when the BNF
  was generated.

  The <math_stack> will return the single expression converted to postfix.

  Arguments:
    math_stack: Returns postfix notation of one math expression.

  Returns:
    A BNF of an math/string expression.
  """

  def PushAggregation(tokens):
    """Pushes aggregation functions onto the stack.

    When the aggregation is pushed, the name is rewritten. The label is
    prepended with AGGREGATION_ to signal that an aggregation is occurring.
    Following this prefix is an integer, which represents the number of comma
    separated arguments that were provided. Finally, the name of the function
    is appended to the label. For most functions, the aggregation name is
    simply appended. However, there are special exceptions for COUNT.
    A normal count function is rewritten as AGGREGATION_i_COUNT. However,
    a count with the distinct keyword is rewritten to
    AGGREGATION_i_DISTINCTCOUNT.

    Args:
      tokens: The function name and arguments in a list object.
    """
    function_name = tokens[0]
    # Rename count with distinct keyword as distinctcount.
    if function_name == 'COUNT':
      if 'DISTINCT' in list(tokens):
        function_name = 'DISTINCTCOUNT'
    # Assume all aggregation functions have at least one argument.
    # If a function n commas, then it has n + 1 arguments.
    num_args = 1
    for token in tokens:
      if token == ',':
        num_args += 1
    math_stack.append(util.AggregationFunctionToken(function_name, num_args))

  def PushFunction(tokens):
    """Push a function token onto the stack.

    Args:
      tokens: list of all tokens, tokens[0] is the function name str.
    """
    math_stack.append(util.BuiltInFunctionToken(tokens[0]))

  def PushSingleToken(tokens):
    """Push the topmost token onto the stack."""
    if util.IsFloat(tokens[0]):
      try:
        token = int(tokens[0])
      except ValueError:
        token = float(tokens[0])
    elif tokens[0].startswith('\'') or tokens[0].startswith('"'):
      token = util.StringLiteralToken(tokens[0])
    elif tokens[0].lower() in util.BIGQUERY_CONSTANTS:
      token = util.LiteralToken(tokens[0].lower(),
                                util.BIGQUERY_CONSTANTS[tokens[0].lower()])
    else:
      token = util.FieldToken(tokens[0])
    math_stack.append(token)

  def PushCountStar(tokens):
    if tokens[0] != '*':
      raise ValueError('Not a count star argument.')
    math_stack.append(util.CountStarToken())

  def PushUnaryOperators(tokens):
    # The list must be reversed since unary operations are unwrapped in the
    # other direction. An example is ~-1. The negation occurs before the bit
    # inversion.
    for i in reversed(range(0, len(tokens))):
      if tokens[i] == '-':
        math_stack.append(int('-1'))
        math_stack.append(util.OperatorToken('*', 2))
      elif tokens[i] == '~':
        math_stack.append(util.OperatorToken('~', 1))
      elif tokens[i].lower() == 'not':
        math_stack.append(util.OperatorToken('not', 1))

  def PushBinaryOperator(tokens):
    math_stack.append(util.OperatorToken(tokens[0], 2))

  # Miscellaneous symbols and keywords.
  comma = pp.Literal(',')
  decimal = pp.Literal('.')
  exponent_literal = pp.CaselessLiteral('E')
  lp = pp.Literal('(')
  rp = pp.Literal(')')
  count_star = pp.Literal('*')
  distinct_keyword = pp.CaselessKeyword('DISTINCT')

  # Any non-space containing sequence of characters that must begin with
  # an alphabetical character and contain alphanumeric characters
  # and underscores (i.e. function or variable names).
  label = pp.Word(pp.alphas, pp.alphas + pp.nums + '_' + '.')

  # A single/double quote surrounded string.
  string = pp.quotedString

  # Various number representations.
  integer = pp.Word(pp.nums)
  decimal_type1 = pp.Combine(integer + decimal + pp.Optional(integer))
  decimal_type2 = pp.Combine(decimal + integer)
  real = decimal_type1 | decimal_type2
  exponent = exponent_literal + pp.Word('+-' + pp.nums, pp.nums)
  number_without_exponent = real | integer
  number = pp.Combine(number_without_exponent + pp.Optional(exponent))
  integer_argument = pp.Word(pp.nums)
  integer_argument.setParseAction(PushSingleToken)

  # Forward declaration for recusive grammar. We assume that full_expression can
  # represent any expression that is valid.
  full_expression = pp.Forward()

  # Aggregation function definitions.
  avg_function = pp.CaselessKeyword('AVG') + lp + full_expression + rp
  count_star.setParseAction(PushCountStar)
  count_argument = ((pp.Optional(distinct_keyword) + full_expression) |
                    count_star)
  count_function = (pp.CaselessKeyword('COUNT') + lp +
                    count_argument + pp.Optional(comma + integer_argument) + rp)
  quantiles_function = (pp.CaselessKeyword('QUANTILES') + lp + full_expression +
                        pp.Optional(comma + integer_argument) + rp)
  stddev_function = pp.CaselessKeyword('STDDEV') + lp + full_expression + rp
  variance_function = pp.CaselessKeyword('VARIANCE') + lp + full_expression + rp
  last_function = pp.CaselessKeyword('LAST') + lp + full_expression + rp
  max_function = pp.CaselessKeyword('MAX') + lp + full_expression + rp
  min_function = pp.CaselessKeyword('MIN') + lp + full_expression + rp
  nth_function = (pp.CaselessKeyword('NTH') + lp + integer_argument + comma +
                  full_expression + rp)
  group_concat_function = (pp.CaselessKeyword('GROUP_CONCAT') + lp +
                           full_expression + rp)
  sum_function = pp.CaselessKeyword('SUM') + lp + full_expression + rp
  top_function = (pp.CaselessKeyword('TOP') + lp + full_expression +
                  pp.Optional(comma + integer_argument +
                              pp.Optional(comma + integer_argument)) + rp)
  aggregate_functions = (avg_function | count_function | quantiles_function |
                         stddev_function | variance_function | last_function |
                         max_function | min_function | nth_function |
                         group_concat_function | sum_function | top_function)
  aggregate_functions.setParseAction(PushAggregation)

  functions_arguments = pp.Optional(full_expression +
                                    pp.ZeroOrMore(comma.suppress() +
                                                  full_expression))
  functions = label + lp + functions_arguments + rp
  functions.setParseAction(PushFunction)

  literals = number | string | label
  literals.setParseAction(PushSingleToken)

  # Any expression that can be modified by an unary operator.
  # We include strings (even though they can't be modified by any unary
  # operator) since atoms do not necessitate modification by unary operators.
  # These errors will be caught by the interpreter.
  atom = ((lp + full_expression + rp) |
          aggregate_functions |
          functions |
          literals)

  unary_operators = (pp.CaselessLiteral('+') |
                     pp.CaselessLiteral('-') |
                     pp.CaselessLiteral('~') |
                     pp.CaselessKeyword('not'))
  # Take all unary operators preceding atom (possibly many).
  current_expression = (pp.ZeroOrMore(unary_operators) +
                        atom.suppress())
  current_expression.setParseAction(PushUnaryOperators)

  # All operators in same set have same precedence. Precedence is top to bottom.
  binary_operators = [
      (pp.CaselessLiteral('*') | pp.CaselessLiteral('/') |
       pp.CaselessLiteral('%')),
      pp.CaselessLiteral('+') | pp.CaselessLiteral('-'),
      pp.CaselessLiteral('>>') | pp.CaselessLiteral('<<'),
      (pp.CaselessLiteral('<=') | pp.CaselessLiteral('>=') |
       pp.CaselessLiteral('<') | pp.CaselessLiteral('>')),
      (pp.CaselessLiteral('==') | pp.CaselessLiteral('=') |
       pp.CaselessLiteral('!=')),
      pp.CaselessKeyword('is') | pp.CaselessKeyword('contains'),
      pp.CaselessLiteral('&'),
      pp.CaselessLiteral('^'),
      pp.CaselessLiteral('|'),
      pp.CaselessKeyword('and'),
      pp.CaselessKeyword('or'),
  ]

  # Take the operator set of the most precedence that has not been parsed.
  # Find and collapse all operators of the set. Thus, order of operations
  # is not broken. Equivalent to recursive descent parsing.
  # Below code is equivalent to:
  # expression = expression + pp.ZeroOrMore(op_level1 + expression)
  # expression = expression + pp.ZeroOrMore(op_level2 + expression)
  # ...
  for operator_set in binary_operators:
    # Represents _i-1 ai part of expression that is added to current expression.
    operator_expression = operator_set + current_expression
    # Push only the operator, both atoms will have already been pushed.
    operator_expression.setParseAction(PushBinaryOperator)
    # pylint: disable=g-no-augmented-assignment
    current_expression = (current_expression +
                          pp.ZeroOrMore(operator_expression))

  # pylint: disable=pointless-statement
  full_expression << current_expression
  return full_expression
コード例 #30
0
class SSHPlugin(interface.SyslogPlugin):
    """A plugin for creating events from syslog message produced by SSH."""
    NAME = 'ssh'
    DESCRIPTION = 'Parser for SSH syslog entries.'
    REPORTER = 'sshd'

    _AUTHENTICATION_METHOD = (pyparsing.Keyword('password')
                              | pyparsing.Keyword('publickey'))

    _PYPARSING_COMPONENTS = {
        'address':
        text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('address'),
        'authentication_method':
        _AUTHENTICATION_METHOD.setResultsName('authentication_method'),
        'fingerprint':
        pyparsing.Combine(
            pyparsing.Literal('RSA ') +
            pyparsing.Word(':' +
                           pyparsing.hexnums)).setResultsName('fingerprint'),
        'port':
        pyparsing.Word(pyparsing.nums, max=5).setResultsName('port'),
        'protocol':
        pyparsing.Literal('ssh2').setResultsName('protocol'),
        'username':
        pyparsing.Word(pyparsing.alphanums).setResultsName('username'),
    }

    _LOGIN_GRAMMAR = (
        pyparsing.Literal('Accepted') +
        _PYPARSING_COMPONENTS['authentication_method'] +
        pyparsing.Literal('for') + _PYPARSING_COMPONENTS['username'] +
        pyparsing.Literal('from') + _PYPARSING_COMPONENTS['address'] +
        pyparsing.Literal('port') + _PYPARSING_COMPONENTS['port'] +
        _PYPARSING_COMPONENTS['protocol'] + pyparsing.Optional(
            pyparsing.Literal(':') + _PYPARSING_COMPONENTS['fingerprint']) +
        pyparsing.StringEnd())

    _FAILED_CONNECTION_GRAMMAR = (
        pyparsing.Literal('Failed') +
        _PYPARSING_COMPONENTS['authentication_method'] +
        pyparsing.Literal('for') + _PYPARSING_COMPONENTS['username'] +
        pyparsing.Literal('from') + _PYPARSING_COMPONENTS['address'] +
        pyparsing.Literal('port') + _PYPARSING_COMPONENTS['port'] +
        pyparsing.StringEnd())

    _OPENED_CONNECTION_GRAMMAR = (pyparsing.Literal('Connection from') +
                                  _PYPARSING_COMPONENTS['address'] +
                                  pyparsing.Literal('port') +
                                  _PYPARSING_COMPONENTS['port'] +
                                  pyparsing.LineEnd())

    MESSAGE_GRAMMARS = [
        ('login', _LOGIN_GRAMMAR),
        ('failed_connection', _FAILED_CONNECTION_GRAMMAR),
        ('opened_connection', _OPENED_CONNECTION_GRAMMAR),
    ]

    def ParseMessage(self, parser_mediator, key, timestamp, tokens):
        """Produces an event from a syslog body that matched one of the grammars.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the matching grammar.
      timestamp (int): the timestamp, which contains the number of micro seconds
          since January 1, 1970, 00:00:00 UTC or 0 on error.
      tokens (dict[str, str]): tokens derived from a syslog message based on
          the defined grammar.

    Raises:
      AttributeError: If an unknown key is provided.
    """
        # TODO: change AttributeError into ValueError or equiv.
        if key not in ('failed_connection', 'login', 'opened_connection'):
            raise AttributeError('Unknown grammar key: {0:s}'.format(key))

        if key == 'login':
            event_data = SSHLoginEventData()

        elif key == 'failed_connection':
            event_data = SSHFailedConnectionEventData()

        elif key == 'opened_connection':
            event_data = SSHOpenedConnectionEventData()

        event_data.address = tokens.get('address', None)
        event_data.authentication_method = tokens.get('authentication_method',
                                                      None)
        event_data.body = tokens.get('body', None)
        event_data.fingerprint = tokens.get('fingerprint', None)
        event_data.hostname = tokens.get('hostname', None)
        # TODO: pass line number to offset or remove.
        event_data.offset = 0
        event_data.pid = tokens.get('pid', None)
        event_data.protocol = tokens.get('protocol', None)
        event_data.port = tokens.get('port', None)
        event_data.reporter = tokens.get('reporter', None)
        event_data.severity = tokens.get('severity', None)
        event_data.username = tokens.get('username', None)

        event = time_events.TimestampEvent(
            timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
        parser_mediator.ProduceEventWithEventData(event, event_data)