Python Regex Examples, pyparsing.Regex Python Examples

Example #1

0

Show file

File: parse.py Project: dakirby/PyBNF

def parse(s):
    equals = pp.Suppress('=')
    colon = pp.Suppress(':')
    comment = pp.Suppress(
        pp.Optional(pp.Literal('#') - pp.ZeroOrMore(pp.Word(pp.printables))))
    # set up multiple grammars

    # single str value
    strkeys = pp.oneOf(' '.join(strkeylist), caseless=True)
    string = pp.Word(pp.alphanums + punctuation)
    strgram = strkeys - equals - string - comment

    # single num value
    numkeys = pp.oneOf(' '.join(numkeys_int + numkeys_float), caseless=True)
    point = pp.Literal(".")
    e = pp.CaselessLiteral("E")
    num = pp.Combine(
        pp.Word("+-" + pp.nums, pp.nums) +
        pp.Optional(point + pp.Optional(pp.Word(pp.nums))) +
        pp.Optional(e + pp.Word("+-" + pp.nums, pp.nums)))
    numgram = numkeys - equals - num - comment

    # variable definition grammar
    strnumkeys = pp.oneOf(' '.join(var_def_keys + b_var_def_keys),
                          caseless=True)
    bng_parameter = pp.Word(pp.alphas, pp.alphanums + "_")
    varnums = bng_parameter - num - num - pp.Optional(pp.Word("ubBU"))
    strnumgram = strnumkeys - equals - varnums - comment

    # multiple string value grammar
    multstrkey = pp.oneOf(' '.join(multstrkeys), caseless=True)
    multstrgram = multstrkey - equals - pp.OneOrMore(string)

    # var and logvar alt grammar (only one number given)
    varkeys = pp.oneOf(' '.join(var_def_keys_1or2nums), caseless=True)
    vargram = varkeys - equals - bng_parameter - num - pp.Optional(
        num) - comment

    # multiple num value
    multnumkey = pp.oneOf(' '.join(multnumkeys), caseless=True)
    multnumgram = multnumkey - equals - pp.OneOrMore(num) - comment

    # model-data mapping grammar
    mdmkey = pp.CaselessLiteral("model")
    nonetoken = pp.Suppress(pp.CaselessLiteral("none"))
    model_file = pp.Regex(".*?\.(bngl|xml)")
    exp_file = pp.Regex(".*?\.(exp|con|prop)")
    mdmgram = mdmkey - equals - model_file - colon - (
        pp.delimitedList(exp_file) ^ nonetoken) - comment

    # normalization mapping grammar
    normkey = pp.CaselessLiteral("normalization")
    anything = pp.Word(pp.alphanums + punctuation + ' ')
    normgram = normkey - equals - anything  # The set of legal grammars for normalization is too complicated,
    # Will handle with separate code.

    # Grammar for dictionary-like specification of simulation actions
    # We are intentionally over-permissive here, because the Action class will be able to give more helpful error
    # messages than a failed parse.
    dict_entry = pp.Word(
        pp.alphas) - colon - pp.Word(pp.alphanums + punctuation_safe)
    dict_key = pp.oneOf(' '.join(dictkeys), caseless=True)
    dictgram = dict_key - equals - pp.delimitedList(dict_entry) - comment

    # mutant model grammar
    mutkey = pp.CaselessLiteral('mutant')
    mut_op = pp.Group(
        pp.Word(pp.alphas + '_', pp.alphanums + '_') - pp.oneOf('+ - * / =') -
        num)
    mutgram = mutkey - equals - string - string - pp.Group(pp.OneOrMore(mut_op)) - \
        pp.Group(colon - (pp.delimitedList(exp_file) ^ nonetoken)) - comment

    # check each grammar and output somewhat legible error message
    line = (mdmgram | strgram | numgram | strnumgram | multnumgram
            | multstrgram | vargram | normgram | dictgram
            | mutgram).parseString(s, parseAll=True).asList()

    return line

Example #2

0

Show file

class StructDefine(object):
    """
    StructDefine is a decorator class used for defining structures
    by parsing a simple intermediate language input decorating
    a StructFormatter class.
    """

    All = {}
    rawtypes = (
        "x",
        "c",
        "b",
        "B",
        "h",
        "H",
        "i",
        "I",
        "l",
        "L",
        "f",
        "d",
        "s",
        "n",
        "N",
        "p",
        "P",
        "q",
        "Q",
    )
    alignments = {
        "x": 1,
        "c": 1,
        "b": 1,
        "B": 1,
        "s": 1,
        "h": 2,
        "H": 2,
        "i": 4,
        "I": 4,
        "l": 4,
        "L": 4,
        "f": 4,
        "q": 8,
        "Q": 8,
        "d": 8,
        "P": 8,
    }
    integer = pp.Regex(r"[0-9][0-9]*")
    integer.setParseAction(lambda r: int(r[0]))
    bitslen = pp.Group(pp.Suppress("#") + integer + pp.Suppress(".") + integer)
    symbol = pp.Regex(r"[A-Za-z_][A-Za-z0-9_]*")
    comment = pp.Suppress(";") + pp.restOfLine
    fieldname = pp.Suppress(":") + pp.Group(
        pp.Optional(pp.Literal(">") | pp.Literal("<"), default=None) + symbol)
    inf = pp.Regex(r"~[bBhHiI]?")
    length = integer | symbol | inf | bitslen
    typename = pp.Group(symbol +
                        pp.Optional(pp.Suppress("*") + length, default=0))
    structfmt = pp.OneOrMore(
        pp.Group(typename + fieldname + pp.Optional(comment, default="")))

    def __init__(self, fmt, **kargs):
        self.fields = []
        self.source = fmt
        self.packed = kargs.get("packed", False)
        if "alignments" in kargs:
            self.alignments = kargs["alignments"]
        for l in self.structfmt.parseString(fmt, True).asList():
            f_type, f_name, f_comment = l
            f_order, f_name = f_name
            f_type, f_count = f_type
            if f_order is None and "order" in kargs:
                f_order = kargs["order"]
            if f_type in self.rawtypes:
                f_cls = RawField
                if isinstance(f_count, str) and f_count.startswith("~"):
                    f_cls = VarField
                    if f_count[1:] in "bBhHiI":
                        f_cls = CntField
                f_align = self.alignments[f_type]
            else:
                f_cls = Field
                f_type = kargs.get(f_type, f_type)
                f_align = 0
            self.fields.append(
                f_cls(f_type, f_count, f_name, f_order, f_align, f_comment))

    def __call__(self, cls):
        self.All[cls.__name__] = cls
        cls.fields = self.fields
        cls.source = self.source
        cls.packed = self.packed
        cls.fkeys = defaultdict(default_formatter)
        return cls

Example #3

0

Show file

# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import html
import pyparsing as pp

# Some useful primitives
ident = pp.Word(pp.alphas + "_", pp.alphas + pp.nums + "_")
intNum = pp.Word(pp.nums)
hexNum = pp.Literal("0x") + pp.Word(pp.hexnums)
octalNum = pp.Literal("0") + pp.Word("01234567")
integer = (hexNum | octalNum | intNum) + \
    pp.Optional(pp.Literal("ULL") | pp.Literal("LL") | pp.Literal("L"))
floatNum = pp.Regex(r'\d+(\.\d*)?([eE]\d+)?') + pp.Optional(pp.Literal("f"))
char = pp.Literal("'") + pp.Word(pp.printables, exact=1) + pp.Literal("'")
arrayIndex = integer | ident

lbracket = pp.Literal("(").suppress()
rbracket = pp.Literal(")").suppress()
lbrace = pp.Literal("{").suppress()
rbrace = pp.Literal("}").suppress()
comma = pp.Literal(",").suppress()
equals = pp.Literal("=").suppress()
dot = pp.Literal(".").suppress()
semicolon = pp.Literal(";").suppress()

# initializer := { [member = ] (variable | expression | { initializer } ) }
typeName = ident
varName = ident

Example #4

0

Show file

File: NHtrace.py Project: nickharrismcr/NHtrace

RETURNS = pp.Keyword("returns")
PUBLIC = pp.Keyword("public")
VIRTUAL = pp.Keyword("virtual")
FUNC = pp.oneOf("func function")
PROC = pp.oneOf("proc procedure")
OF = pp.Keyword("of")
SELECT = pp.Keyword("select")
UPDATE = pp.Keyword("update")
FROM = pp.Keyword("from")
AS = pp.Keyword("as")
EQ = pp.Literal("=")
ONE = pp.Keyword("one")
STAR = pp.Literal("*")
UNIQUE = pp.Keyword("unique")
CLASS = pp.Keyword("class")
dblDashComment = pp.Regex(r"--(?:\\\n|[^\n])*").setName("-- comment")

#variable/label name
name = pp.Word(pp.alphanums + "_" + ".")

#sql
astuple = AS + name
select_tablename = (pp.Optional(name + EQ).suppress() +
                    name).setResultsName("select")
update_tablename = (pp.Optional(name + EQ).suppress() +
                    name).setResultsName("update")
tablefield = pp.Word(pp.alphanums + "_" + ".")
tablefields = pp.Optional(tablefield ^ pp.delimitedList(tablefield, ","))

#class
pptype = pp.Word(pp.alphanums + "." + "_")

Example #5

0

Show file

        override = bool(toks.override),
        abstract = bool(toks.abstract),
        is_async = bool(toks.is_async),
        rtype = toks.rtype,
        anotations = toks.anotations,
        throws = list(toks.throws) if toks.throws else [])


def parse_namespace(toks):
    return Namespace(toks.name, toks.members)


# VAPI Parser Grammar
ident = pp.Word(pp.alphas + '_', pp.alphanums + '_').setName("ident")
dot_ident = pp.Combine(ident + pp.ZeroOrMore(pp.Literal(".") + ident))
integer = pp.Regex(r'[+-]?\d+').setName("integer").setParseAction(tokenMap(int))
real = pp.Regex(r'[+-]?\d+\.\d*').setName("real").setParseAction(tokenMap(float))
sci_real = pp.Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("scireal").setParseAction(tokenMap(float))
number = (sci_real | real | integer).streamline()
string = pp.QuotedString("\"", "\\")
null = pp.Literal("null").setParseAction(lambda toks: None)
true = pp.Literal("true").setParseAction(lambda toks: True)
false = pp.Literal("false").setParseAction(lambda toks: False)
value = string | number | null | true | false
param = pp.Group(ident + pp.Literal("=").suppress() + value)
type_name = pp.Combine(dot_ident + pp.Optional(pp.Literal("?")))("type_name")
params = pp.Group(pp.Optional(param + pp.ZeroOrMore(pp.Literal(',').suppress() + param))).setParseAction(parse_params)("params")
params_in_parens = pp.Literal('(').suppress() + pp.Optional(params) + pp.Literal(')').suppress()
anotation = pp.Group(pp.Literal('[').suppress() + ident("name") + pp.Optional(params_in_parens) + pp.Literal(']').suppress()).setParseAction(parse_anotation)
anotations = pp.ZeroOrMore(anotation).setParseAction(parse_anotations)("anotations")
access = pp.Optional(pp.Keyword("protected") | pp.Keyword("public") | pp.Keyword("private") | pp.Keyword("internal"))("access")

Example #6

0

Show file

File: bash_history.py Project: x35029/plaso

class BashHistoryParser(text_parser.PyparsingMultiLineTextParser):
    """Parses events from Bash history files."""

    NAME = 'bash'

    DESCRIPTION = 'Parser for Bash history files'

    _ENCODING = 'utf-8'

    _TIMESTAMP = pyparsing.Suppress('#') + pyparsing.Word(
        pyparsing.nums, min=9, max=10).setParseAction(
            text_parser.PyParseIntCast).setResultsName('timestamp')

    _COMMAND = pyparsing.Regex(r'.*?(?=($|\n#\d{10}))',
                               re.DOTALL).setResultsName('command')

    _LINE_GRAMMAR = _TIMESTAMP + _COMMAND + pyparsing.lineEnd()

    _VERIFICATION_GRAMMAR = (pyparsing.Regex(r'^\s?[^#].*?$', re.MULTILINE) +
                             _TIMESTAMP +
                             pyparsing.NotAny(pyparsing.pythonStyleComment))

    LINE_STRUCTURES = [('log_entry', _LINE_GRAMMAR)]

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a record and produces a Bash history event.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key != 'log_entry':
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        event_data = BashHistoryEventData()
        event_data.command = structure.command

        date_time = dfdatetime_posix_time.PosixTime(
            timestamp=structure.timestamp)
        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_MODIFICATION)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    # pylint: disable=unused-argument
    def VerifyStructure(self, parser_mediator, lines):
        """Verifies that this is a bash history file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between
          parsers and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        match_generator = self._VERIFICATION_GRAMMAR.scanString(lines,
                                                                maxMatches=1)
        return bool(list(match_generator))

Example #7

0

Show file

class ZshExtendedHistoryParser(text_parser.PyparsingMultiLineTextParser):
    """Parser for ZSH extended history files"""

    NAME = 'zsh_extended_history'
    DATA_FORMAT = 'ZSH extended history file'

    _ENCODING = 'utf-8'

    _VERIFICATION_REGEX = re.compile(r'^:\s\d+:\d+;')

    _PYPARSING_COMPONENTS = {
        'timestamp':
        text_parser.PyparsingConstants.INTEGER.setResultsName('timestamp'),
        'elapsed_seconds':
        text_parser.PyparsingConstants.INTEGER.setResultsName(
            'elapsed_seconds'),
        'command':
        pyparsing.Regex(r'.+?(?=($|\n:\s\d+:\d+;))',
                        re.DOTALL).setResultsName('command'),
    }

    _LINE_GRAMMAR = (pyparsing.Literal(':') +
                     _PYPARSING_COMPONENTS['timestamp'] +
                     pyparsing.Literal(':') +
                     _PYPARSING_COMPONENTS['elapsed_seconds'] +
                     pyparsing.Literal(';') +
                     _PYPARSING_COMPONENTS['command'] + pyparsing.LineEnd())

    LINE_STRUCTURES = [('command', _LINE_GRAMMAR)]

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a record and produces a ZSH history event.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure parsed from the log file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key != 'command':
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        event_data = ZshHistoryEventData()
        event_data.command = self._GetValueFromStructure(structure, 'command')
        event_data.elapsed_seconds = self._GetValueFromStructure(
            structure, 'elapsed_seconds')

        timestamp = self._GetValueFromStructure(structure, 'timestamp')
        date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp)
        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_MODIFICATION)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, parser_mediator, lines):
        """Verifies whether content corresponds to a ZSH extended_history file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if the line was successfully parsed.
    """
        if self._VERIFICATION_REGEX.match(lines):
            return True

        return False

Example #8

0

Show file

File: structured_data_annotator.py Project: leobouscarrat/EpiTator

                          adjacent=False)
    value.setParseAction(lambda start, tokens: (start, tokens[0]))
    empty = pypar.Empty()
    empty.setParseAction(lambda start, tokens: (start, tokens))
    value = pypar.Group(value + empty)
    row = pypar.Group(
        pypar.Optional(separator).suppress() +
        (value + pypar.Literal(separator).suppress()) * (1, None) +
        pypar.Optional(value) +
        (pypar.StringEnd() | pypar.Literal("\n")).suppress() +
        pypar.Optional("\n").suppress())
    table_parser ^= (
        (pypar.LineStart() + pypar.Optional(pypar.White())).suppress() +
        # Allow line breaks for table headings
        row + pypar.Optional(
            pypar.Regex(r"[\-_=]{3,}") + pypar.Literal("\n") *
            (1, 2)).suppress() + row * (0, None)).setResultsName("delimiter:" +
                                                                 separator)
table_parser.parseWithTabs()

key_value_separators = [":", "-", ">"]
key_value_list_parser = pypar.NoMatch()
for separator in key_value_separators:
    value = pypar.Combine(word_token_regex(separator) * (1, 10),
                          joinString=' ',
                          adjacent=False)
    value.setParseAction(lambda start, tokens: (start, tokens[0]))
    empty = pypar.Empty()
    empty.setParseAction(lambda start, tokens: (start, tokens))
    value = pypar.Group(value + empty)
    row = pypar.Group(value + pypar.Literal(separator).suppress() + value +

Example #9

0

Show file

File: parser.py Project: dbilli/exprparser

    def __create(self):

        START = pp.StringStart().suppress()
        END = pp.StringEnd().suppress()

        #----------------------------------------------------------------------#
        # LANGUAGE TOKENS
        #----------------------------------------------------------------------#

        TRUE = pp.Literal('True').setParseAction(lambda s, loc, toks: toks[0])
        FALSE = pp.Literal('False').setParseAction(
            lambda s, loc, toks: toks[0])

        AND = pp.Literal('and').setParseAction(lambda s, loc, toks: toks[0])
        OR = pp.Literal('or').setParseAction(lambda s, loc, toks: toks[0])
        NOT = pp.Literal('not').setParseAction(lambda s, loc, toks: toks[0])

        #
        # Expression's elements
        #
        LEFT_PAREN = pp.Literal('(')
        RIGHT_PAREN = pp.Literal(')')
        LEFT_SPAREN = pp.Literal('[')
        RIGHT_SPAREN = pp.Literal(']')
        COMMA = pp.Literal(',')
        SEMICOLON = pp.Literal(';')

        # OID's syntax elements
        COLUMN = pp.Literal(':')
        TYPE_NEW = pp.Literal('@')
        TYPE_OLD = pp.Literal('#')

        # Unescaped String prefix
        UNESCAPE_STR = pp.Literal('r')

        #
        # Operators
        #

        ASSIGN = pp.Literal('=')
        # OIDs concat operator
        DOT = pp.Literal('.')

        PLUS_PLUS = pp.Literal('++')
        MINUS_MINUS = pp.Literal('--')

        POWER = pp.Literal('**')

        PLUS = pp.Literal('+')
        MINUS = pp.Literal('-')
        MULTI = pp.Literal('*')
        DIV = pp.Literal('/')
        MOD = pp.Literal('%')

        EQ = pp.Literal('eq')
        EQUAL = pp.Literal('==')
        NEQUAL = pp.Literal('!=')

        REGEXPQUAL = pp.Literal('=~')

        GT = pp.Literal('>')
        LT = pp.Literal('<')
        GEQ = pp.Literal('>=')
        LEQ = pp.Literal('<=')

        LOGIC_NOT = pp.Literal('!')
        LOGIC_AND = pp.Literal('&&')
        LOGIC_OR = pp.Literal('||')

        BITAND = pp.Literal('&')
        BITOR = pp.Literal('|')
        BITXOR = pp.Literal('^')

        # One's complement operator
        BITONE = pp.Literal('~')

        IF = pp.Literal('if')
        THEN = pp.Literal('then')
        ELSE = pp.Literal('else')

        TRY = pp.Literal('try')
        CATCH = pp.Literal('catch')

        #---------------------------------------------------------------------------*/
        #  Language Types
        #---------------------------------------------------------------------------*/

        #
        # Literals
        #

        QUOTED = pp.QuotedString('"', escChar='\\') | pp.QuotedString(
            "'", escChar='\\')

        STRING = pp.originalTextFor(QUOTED)

        RSTRING = pp.originalTextFor(UNESCAPE_STR + QUOTED)

        #
        # Variable identifiers ($a, $a1, $_a,  $a_a123)
        #
        VAR_ID = pp.Word('$', pp.alphanums + '_', min=2)

        #
        # Function identifiers
        #
        FUNCTION_ID = pp.Word(pp.alphas, pp.alphanums + '_', min=1)

        #
        # Numbers
        #
        HEX = pp.originalTextFor(pp.Regex('[0][xX][0-9a-fA-F]+'))

        DEC = pp.originalTextFor(pp.Word('0') | pp.Regex('[1-9][0-9]*'))

        OCTAL = pp.originalTextFor(pp.Regex('[0][0-7]+'))

        FLOAT1 = pp.Regex('[0-9]+[\.][0-9]+([eE][+-]?[0-9]+)*')

        FLOAT2 = pp.Regex('[0-9]+[\.]([eE][+-]?[0-9]+)*')

        FLOAT = pp.originalTextFor(FLOAT1 | FLOAT2)

        #
        # Special identifiers  { <name> (@|#) }
        #
        DATA_ID = pp.originalTextFor(
            pp.Combine(
                pp.Word('{') + pp.Word(pp.alphas, pp.alphanums + '_-.') +
                pp.Word('@#') + pp.Word('}')))

        #----------------------------------------------------------------------#
        #----------------------------------------------------------------------#
        #
        # GRAMMAR SYNTAX
        #
        #----------------------------------------------------------------------#
        #----------------------------------------------------------------------#

        #----------------------------------------------------------------------#
        #  variabile
        #  constants    (1, 1.0, 'c', "foo", ecc...)
        #  ( ... )
        #----------------------------------------------------------------------#

        OID_SEQUENCE = pp.Regex('[0-9]+[\.][0-9]+([\.][0-9]+)+')

        constant = (
            TRUE.setParseAction(lambda s, loc, toks: self.f.createBool(True))
            |
            FALSE.setParseAction(lambda s, loc, toks: self.f.createBool(False))
            | HEX.setParseAction(
                lambda s, loc, toks: self.f.createInteger(int(toks[1], 16)))
            | (~(OID_SEQUENCE) + FLOAT).setParseAction(
                lambda s, loc, toks: self.f.createFloat(float(toks[0])))
            | OCTAL.setParseAction(
                lambda s, loc, toks: self.f.createInteger(int(toks[1], 8)))
            | DEC.setParseAction(
                lambda s, loc, toks: self.f.createInteger(int(toks[1], 10)))
            | STRING.setParseAction(
                lambda s, loc, toks: self.f.createString(toks, True))
            | RSTRING.setParseAction(
                lambda s, loc, toks: self.f.createString(toks[1:], True)))

        cond_expr = pp.Forward()

        #----------------------------------------------------------------------#
        # Primary Expr
        #----------------------------------------------------------------------#

        primary_expr = (
            (LEFT_PAREN.suppress() + cond_expr + RIGHT_PAREN.suppress()
             ).setParseAction(lambda s, loc, toks: toks[0])
            | VAR_ID.setParseAction(
                lambda s, loc, toks: self.f.createIdentifier(toks[0]))
            | DATA_ID.setParseAction(
                lambda s, loc, toks: self.f.createDataIdentifier(toks[1]))
            | constant)

        #----------------------------------------------------------------------#
        # POSTFIX EXPRESSION
        #----------------------------------------------------------------------#
        # foo()
        # for(a,b,...)
        # $id()
        # $id
        # $id(a,b,...)
        #----------------------------------------------------------------------#

        #
        # Named argument
        #
        named_argument_value = pp.Forward()

        name_argument = (
            FUNCTION_ID + ASSIGN.suppress() + named_argument_value
        ).setParseAction(
            lambda s, loc, toks: self.f.createNamedArgument(toks[0], toks[1]))

        #
        # Simple argument
        #
        simple_argument_value = pp.Forward()

        #
        # 1, 2, 3, foo=10, bar=10234
        #
        argument = name_argument | simple_argument_value

        argument_expr_list = (argument +
                              pp.ZeroOrMore(COMMA.suppress() + argument))

        #----------------------------------------------------------------------#
        #  ( ), (a,b,c,...)
        #----------------------------------------------------------------------#

        def _call_expr_callback(s, loc, toks):
            args = toks.get('args')
            if args is None: args = []
            else: args = list(args)
            return ('CALL', args)

        call_expr = (
            LEFT_PAREN.suppress() + pp.Optional(argument_expr_list('args')) +
            RIGHT_PAREN.suppress()).setParseAction(_call_expr_callback)

        #----------------------------------------------------------------------#
        # [], [;], [i], [i;], [;j]   [i;j]
        #----------------------------------------------------------------------#

        def _range_expr_callback(s, loc, toks):
            args = []
            start = toks.get('start')
            args.append(start)
            if 'end' in toks:
                end = toks.get('end')
                args.append(end)
            return ('RANGE', args)

        range_value = pp.Forward()

        range_expr = (
            LEFT_SPAREN.suppress() + pp.Optional(range_value)('start') +
            pp.Optional(SEMICOLON.suppress() + pp.Optional(range_value)('end'))
            + RIGHT_SPAREN.suppress()).setParseAction(_range_expr_callback)

        #----------------------------------------------------------------------#

        call_or_range = range_expr | call_expr

        def _func_callback(s, loc, toks):

            if len(toks) == 1:
                return toks[0]

            current_t = toks[0]

            for t in toks[1:]:
                f_type, args = t

                if f_type == 'CALL':
                    current_t = self.f.createCallOp(current_t, args)
                elif f_type == 'RANGE':
                    current_t = self.f.createRangeOp(current_t, args)
                else:
                    raise Exception("ERROR")

            return current_t

        postfix_expr = (
            (FUNCTION_ID +
             pp.OneOrMore(call_or_range)).setParseAction(_func_callback)
            | (primary_expr +
               pp.ZeroOrMore(call_or_range)).setParseAction(_func_callback))

        #----------------------------------------------------------------------#
        #  UNARY EXPRESSION
        #----------------------------------------------------------------------#
        #  <expr>
        #  <expr>()
        #  <expr>[]
        #  + <expr>
        #  - <expr>
        #  ~ <expr>
        #  ! <expr>
        #---------------------------------------------------------------------------*/

        unary_expr = pp.Forward()

        calc_expr = (
            postfix_expr
            | (PLUS_PLUS.suppress() + unary_expr).setParseAction(
                lambda s, loc, toks: self.f.createAddAddOp(toks[0]))
            | (MINUS_MINUS.suppress() + unary_expr).setParseAction(
                lambda s, loc, toks: self.f.createSubSubOp(toks[0]))
            | (PLUS.suppress() +
               unary_expr).setParseAction(lambda s, loc, toks: toks[0])
            | (MINUS.suppress() + unary_expr).setParseAction(
                lambda s, loc, toks: self.f.createMinusOp(toks[0]))
            |
            ((LOGIC_NOT | NOT).suppress() + unary_expr
             ).setParseAction(lambda s, loc, toks: self.f.createNotOp(toks[0]))
            | (BITONE.suppress() + unary_expr).setParseAction(
                lambda s, loc, toks: self.f.createBitOneOp(toks[0])))

        #---------------------------------------------------------------------------*/
        # OID Expressions
        #---------------------------------------------------------------------------*/
        # These expressions rappresent SNMP OID values:
        #
        #    <oid expression>  [':' <community-expr>] '@' [ <host-expr> [':' <port-expr>] ]
        #
        # where <oid expression> is:
        #
        #    n.n.n '.' <exp-1> '.' <exp-2> '.' <exp-n>
        #
        #---------------------------------------------------------------------------*/

        #
        #  The DOT ('.') operator is a bit tricky: expressions are converted
        #  into strings and concatenated.
        #
        #  This means that if i concatenate OID  1.2.3.4  with the float
        #  literal 5.6  the result is  1.2.3.4.5.6
        #

        def _oid_compositon_callback(s, loc, toks):
            toks = list(toks)

            expr = toks.pop(0)
            while toks:
                expr = self.f.createConcatOID(expr, toks.pop(0))
            return expr

        def _oid_callback(s, loc, toks):
            return self.f.createOID(toks[1])

        oid_compositon = (
            pp.originalTextFor(OID_SEQUENCE).setParseAction(_oid_callback) +
            pp.ZeroOrMore(DOT.suppress() + (
                pp.originalTextFor(OID_SEQUENCE).setParseAction(_oid_callback)
                | postfix_expr))).setParseAction(_oid_compositon_callback)

        def _snmp_single_expr_callback(s, loc, toks):
            oid = toks['oid']
            community = toks['community'] if 'community' in toks else None
            t = toks['type']
            node = toks['node'] if 'node' in toks else None
            port = toks['port'] if 'port' in toks else None
            return self.f.createSnmpValue(oid, community, t, node, port)

        snmp_single_expr = (
            oid_compositon('oid') +
            pp.Optional(COLUMN.suppress() + postfix_expr)('community') +
            pp.originalTextFor(TYPE_OLD | TYPE_NEW)('type') + pp.Optional(
                postfix_expr('node') +
                pp.Optional(COLUMN.suppress() + postfix_expr)('port'))
        ).setParseAction(_snmp_single_expr_callback)

        #----------------------------------------------------------------------#
        # 1.3.6.1.2.1.1@ [ ]
        #----------------------------------------------------------------------#

        def _func_callback_x(s, loc, toks):
            toks = list(toks)
            if len(toks) == 1: return toks[0]
            expr = toks[0]
            range_args = toks[1][1]
            return self.f.createRangeOp(expr, range_args)

        snmp_value_expr = (
            snmp_single_expr +
            pp.Optional(range_expr)).setParseAction(_func_callback_x)

        #----------------------------------------------------------------------#
        # IF <expr> THEN <expr ELSE <expr>
        #----------------------------------------------------------------------#

        def _if_callback(s, loc, toks):
            e1 = toks.get('e1')
            e2 = toks.get('e2')
            e3 = toks.get('e3')
            return self.f.createIf(e1, e2, e3)

        if_expr = (IF.suppress() + cond_expr("e1") + THEN.suppress() +
                   cond_expr("e2") + ELSE.suppress() +
                   cond_expr("e3")).setParseAction(_if_callback)

        #----------------------------------------------------------------------#
        # try <expr> catch [ <id> ] ( <expr> ) [ catch <id> ( <expr> ) ....]
        #----------------------------------------------------------------------#

        def _catch_expr_callback(s, loc, toks):
            ex_name = toks.get('exception')
            expr = toks.get('expr')

            return (ex_name, expr)

        def _try_expr_callback(s, loc, toks):
            body = toks['body']
            catch_list = list(toks['catch_list'])

            return self.f.createTry(body, catch_list)

        #
        # catch [ <expr> ] ( <expr> )
        #
        catch_expr_body = pp.Forward()

        catch_expr = (
            pp.Optional(FUNCTION_ID)('exception') + LEFT_PAREN.suppress() +
            pp.Optional(cond_expr)('expr') +
            RIGHT_PAREN.suppress()).setParseAction(_catch_expr_callback)

        #
        # try <expr> [ catch <expr> ( <expr> ) .... ]
        #
        catch_list = CATCH.suppress() + pp.OneOrMore(catch_expr)

        try_expr = (
            TRY.suppress() + cond_expr('body') +
            catch_list('catch_list')).setParseAction(_try_expr_callback)

        #----------------------------------------------------------------------#
        # UNARY EXPRESSION
        #----------------------------------------------------------------------#

        unary_expr <<= (if_expr | try_expr | snmp_value_expr | calc_expr)

        #----------------------------------------------------------------------#
        # OPERATORS
        #----------------------------------------------------------------------#

        OP_MAP = {
            str(POWER.match): self.f.createPowerOp,
            str(MULTI.match): self.f.createMultiOp,
            str(DIV.match): self.f.createDivOp,
            str(MOD.match): self.f.createModOp,
            str(PLUS.match): self.f.createAddOp,
            str(MINUS.match): self.f.createSubOp,
            str(LT.match): self.f.createLtOp,
            str(GT.match): self.f.createGtOp,
            str(LEQ.match): self.f.createLEqOp,
            str(GEQ.match): self.f.createGEqOp,
            str(EQUAL.match): self.f.createEqOp,
            str(EQ.match): self.f.createEqOp,
            str(NEQUAL.match): self.f.createNotEqOp,
            str(REGEXPQUAL.match): self.f.createRegExpEqOp,
            str(BITAND.match): self.f.createBitAndOp,
            str(BITXOR.match): self.f.createBitXOrOp,
            str(BITOR.match): self.f.createBitOrOp,
            str(AND.match): self.f.createAndOp,
            str(LOGIC_AND.match): self.f.createAndOp,
            str(OR.match): self.f.createOrOp,
            str(LOGIC_OR.match): self.f.createOrOp,
        }

        def _op_callback(s, loc, toks):
            l = list(toks)
            if len(l) == 1: return l

            expr = l.pop(0)
            while l:
                op, expr2 = l.pop(0), l.pop(0)
                op_callback = OP_MAP[op]
                expr = op_callback(expr, expr2)
            return expr

        expr = unary_expr

        #// a ** b
        expr = (expr +
                pp.ZeroOrMore(POWER + expr)).setParseAction(_op_callback)

        #// a * b
        #// a / c
        #// a % c
        expr = (expr + pp.ZeroOrMore((MULTI | DIV | MOD) +
                                     expr)).setParseAction(_op_callback)

        #// a + b
        #// a - b
        expr = (
            expr +
            pp.ZeroOrMore((PLUS | MINUS) + expr)).setParseAction(_op_callback)

        #// a < b
        #// a > b
        #// a <= b
        #// a >= b
        expr = (expr + pp.ZeroOrMore((LT | GT | LEQ | GEQ) +
                                     expr)).setParseAction(_op_callback)

        #// a == b
        #// a != b
        #// a ~= b
        expr = (expr + pp.ZeroOrMore((EQUAL | EQ | NEQUAL | REGEXPQUAL) +
                                     expr)).setParseAction(_op_callback)

        #// a & b
        expr = (expr +
                pp.ZeroOrMore(BITAND + expr)).setParseAction(_op_callback)

        #// a ^ b
        expr = (expr +
                pp.ZeroOrMore(BITXOR + expr)).setParseAction(_op_callback)

        #// a | b
        expr = (expr +
                pp.ZeroOrMore(BITOR + expr)).setParseAction(_op_callback)

        #// a && b
        expr = (expr + pp.ZeroOrMore((LOGIC_AND | AND) +
                                     expr)).setParseAction(_op_callback)

        #//  a || b
        expr = (
            expr +
            pp.ZeroOrMore((LOGIC_OR | OR) + expr)).setParseAction(_op_callback)

        #----------------------------------------------------------------------#
        # Recursive rules
        #----------------------------------------------------------------------#

        cond_expr <<= expr

        simple_argument_value <<= cond_expr
        named_argument_value <<= cond_expr
        range_value <<= cond_expr

        #----------------------------------------------------------------------#
        # Initiali RULE
        #----------------------------------------------------------------------#

        lang_expr = (START + cond_expr + END)

        return lang_expr

Example #10

0

Show file

File: grammar.py Project: msfrank/cifparser

import pyparsing as pp

from cifparser.path import path_parser
from cifparser.errors import ParserError

Comment = collections.namedtuple('Comment',['value'])
ObjectDef = collections.namedtuple('ObjectDef', ['path'])
ListItemDef = collections.namedtuple('ListItemDef', ['path'])
FieldDef = collections.namedtuple('FieldDef', ['field_name','field_value'])
ValueContinuation = collections.namedtuple('ValueContinuation', ['value_continuation'])
ListContinuation = collections.namedtuple('ListContinuation', ['list_continuation'])

comment_parser = pp.Literal('#') + pp.restOfLine
objectdef_parser = path_parser + pp.Literal(':')
listitemdef_parser = pp.Literal('-') + path_parser + pp.Literal(':')
fieldkey_parser = pp.Regex(r'[^=]+')
fieldkey_parser.setParseAction(lambda tokens: tokens[0].strip())
fielddef_parser = fieldkey_parser + pp.Literal('=') + pp.restOfLine
valuecontinuation_parser = pp.Literal('|') + pp.restOfLine
listcontinuation_parser = pp.Literal(',') + pp.restOfLine

def comment_parse_action(tokens):
    return Comment(tokens[1])
comment_parser.setParseAction(comment_parse_action)

def objectdef_parse_action(tokens):
    return ObjectDef(tokens[0])
objectdef_parser.setParseAction(objectdef_parse_action)

def listitemdef_parse_action(tokens):
    return ListItemDef(tokens[1])

Example #11

0

Show file

File: test.py Project: asdbaihu/pyparsing_ext

#!/usr/local/bin/python
# -*- coding: utf-8 -*-

import pyparsing as pp
import pyparsing_ext as ppx

w = ppx.Wordx(lambda x: x in {'a', 'b', 'c', 'd'})
print(w.parseString('abbcccdddde'))

M = ppx.delimitedMatrix(w, ch1=' ', ch2=pp.Regex('\n+').leaveWhitespace())
p = M.parseString('a b\nc d')
print(p.asList())

s = '''[1]hehe
[2]hehe'''
print(ppx.enumeratedItems().parseString(s))

Example #12

0

Show file

    def parse(self, request):
        input = request._rest_context.get('filter')
        if not input:
            return None

        condition_positions = []

        operator = pp.Regex('|'.join(self.ALLOWED_OPERATORS))
        number = pp.Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?")

        AND = pp.Literal(LOGICAL_OPERATORS.AND)
        OR = pp.Literal(LOGICAL_OPERATORS.OR)
        NOT = pp.Literal(LOGICAL_OPERATORS.NOT)

        identifier = pp.Regex(r"[a-zA-Z]+[a-zA-Z0-9]*(_[a-zA-Z0-9]+)*")
        identifiers = pp.Group(
            pp.delimitedList(identifier, delim="__", combine=False))

        comparison_term = pp.Forward()
        list_term = (
            pp.Group(
                pp.Suppress('[') +
                pp.delimitedList(comparison_term, delim=",", combine=False) +
                pp.Suppress(']'))
            | pp.Group(
                pp.Suppress('(') +
                pp.delimitedList(comparison_term, delim=",", combine=False) +
                pp.Suppress(')'))
            | pp.Group(
                pp.Suppress('{') +
                pp.delimitedList(comparison_term, delim=",", combine=False) +
                pp.Suppress('}')))
        string = (pp.QuotedString("'", escChar='\\', unquoteResults=True)
                  | pp.QuotedString('"', escChar='\\', unquoteResults=True))
        null = pp.Literal('null').setParseAction(lambda s, l, t: None)
        boolean = pp.Regex('|'.join(
            ('true', 'false'))).setParseAction(lambda s, l, t: t[0] == 'true')

        comparison_term << (string | number | list_term | null | boolean)

        condition = pp.Group(identifiers + operator +
                             comparison_term).setResultsName('condition')
        condition.setParseAction(
            lambda s, loc, tocs: condition_positions.append(loc))

        expr = pp.operatorPrecedence(condition, [
            (
                NOT,
                1,
                pp.opAssoc.RIGHT,
            ),
            (
                AND,
                2,
                pp.opAssoc.LEFT,
            ),
            (
                OR,
                2,
                pp.opAssoc.LEFT,
            ),
        ])

        try:
            return self._parse_to_conditions(
                expr.parseString(input, parseAll=True).asList()[0],
                list(condition_positions), condition, input)
        except pp.ParseException as ex:
            raise FilterParserError(
                mark_safe(ugettext('Invalid filter value "{}"').format(input)))

Example #13

0

Show file

class SCCMParser(text_parser.PyparsingMultiLineTextParser):
  """Parser for Windows System Center Configuration Manager (SCCM) logs."""

  NAME = 'sccm'
  DESCRIPTION = 'Parser for SCCM logs files.'

  _ENCODING = 'utf-8-sig'

  # Increasing the buffer size as SCCM messages are commonly well larger
  # than the default value.
  BUFFER_SIZE = 16384

  LINE_STRUCTURES = []

  _MICRO_SECONDS_PER_MINUTE = 60 * 1000000

  _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS
  _ONE_OR_TWO_DIGITS = text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS

  # PyParsing Components used to construct grammars for parsing lines.
  _PARSING_COMPONENTS = {
      'msg_left_delimiter': pyparsing.Literal('<![LOG['),
      'msg_right_delimiter': pyparsing.Literal(']LOG]!><time="'),
      'year': _FOUR_DIGITS.setResultsName('year'),
      'month': _ONE_OR_TWO_DIGITS.setResultsName('month'),
      'day': _ONE_OR_TWO_DIGITS.setResultsName('day'),
      'microsecond': pyparsing.Regex(r'\d{3,7}'). setResultsName('microsecond'),
      'utc_offset_minutes': pyparsing.Regex(r'[-+]\d{3}').setResultsName(
          'utc_offset_minutes'),
      'date_prefix': pyparsing.Literal('" date="'). setResultsName(
          'date_prefix'),
      'component_prefix': pyparsing.Literal('" component="').setResultsName(
          'component_prefix'),
      'component': pyparsing.Word(pyparsing.alphanums).setResultsName(
          'component'),
      'text': pyparsing.Regex(
          r'.*?(?=(]LOG]!><time="))', re.DOTALL).setResultsName('text'),
      'line_remainder': pyparsing.Regex(
          r'.*?(?=(\<!\[LOG\[))', re.DOTALL).setResultsName('line_remainder'),
      'lastline_remainder': pyparsing.restOfLine.setResultsName(
          'lastline_remainder'),
      'hour': _ONE_OR_TWO_DIGITS.setResultsName('hour'),
      'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
          'minute'),
      'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
          'second')}

  # Base grammar for individual log event lines.
  LINE_GRAMMAR_BASE = (
      _PARSING_COMPONENTS['msg_left_delimiter'] +
      _PARSING_COMPONENTS['text'] +
      _PARSING_COMPONENTS['msg_right_delimiter'] +
      _PARSING_COMPONENTS['hour'] +
      pyparsing.Suppress(':') + _PARSING_COMPONENTS['minute'] +
      pyparsing.Suppress(':') + _PARSING_COMPONENTS['second'] +
      pyparsing.Suppress('.') + _PARSING_COMPONENTS['microsecond'] +
      _PARSING_COMPONENTS['date_prefix'] + _PARSING_COMPONENTS['month'] +
      pyparsing.Suppress('-') + _PARSING_COMPONENTS['day'] +
      pyparsing.Suppress('-') + _PARSING_COMPONENTS['year'] +
      _PARSING_COMPONENTS['component_prefix'] +
      _PARSING_COMPONENTS['component'])

  # Grammar for individual log event lines with a minutes offset from UTC.
  LINE_GRAMMAR_OFFSET = (
      _PARSING_COMPONENTS['msg_left_delimiter'] +
      _PARSING_COMPONENTS['text'] +
      _PARSING_COMPONENTS['msg_right_delimiter'] +
      _PARSING_COMPONENTS['hour'] +
      pyparsing.Suppress(':') + _PARSING_COMPONENTS['minute'] +
      pyparsing.Suppress(':') + _PARSING_COMPONENTS['second'] +
      pyparsing.Suppress('.') + _PARSING_COMPONENTS['microsecond'] +
      _PARSING_COMPONENTS['utc_offset_minutes'] +
      _PARSING_COMPONENTS['date_prefix'] + _PARSING_COMPONENTS['month'] +
      pyparsing.Suppress('-') + _PARSING_COMPONENTS['day'] +
      pyparsing.Suppress('-') + _PARSING_COMPONENTS['year'] +
      _PARSING_COMPONENTS['component_prefix'] +
      _PARSING_COMPONENTS['component'])

  LINE_STRUCTURES = [
      ('log_entry',
       LINE_GRAMMAR_BASE + _PARSING_COMPONENTS['line_remainder']),
      ('log_entry_at_end',
       LINE_GRAMMAR_BASE +_PARSING_COMPONENTS['lastline_remainder'] +
       pyparsing.lineEnd),
      ('log_entry_offset',
       LINE_GRAMMAR_OFFSET + _PARSING_COMPONENTS['line_remainder']),
      ('log_entry_offset_at_end',
       LINE_GRAMMAR_OFFSET + _PARSING_COMPONENTS['lastline_remainder'] +
       pyparsing.lineEnd)]

  def ParseRecord(self, parser_mediator, key, structure):
    """Parse the record and return an SCCM log event object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
      TimestampError: when a non-int value for microseconds is encountered.
    """
    if key not in (
        'log_entry', 'log_entry_at_end', 'log_entry_offset',
        'log_entry_offset_at_end'):
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    # Sometimes, SCCM logs will exhibit a seven-digit sub-second precision
    # (100 nanosecond intervals). Using six-digit precision because
    # timestamps are in microseconds.
    if len(structure.microsecond) > 6:
      structure.microsecond = structure.microsecond[0:6]

    try:
      microseconds = int(structure.microsecond, 10)
    except ValueError as exception:
      parser_mediator.ProduceExtractionError(
          'unable to determine microseconds with error: {0!s}'.format(
              exception))
      return

    # 3-digit precision is milliseconds,
    # so multiply by 1000 to convert to microseconds
    if len(structure.microsecond) == 3:
      microseconds *= 1000

    try:
      timestamp = timelib.Timestamp.FromTimeParts(
          structure.year, structure.month, structure.day,
          structure.hour, structure.minute, structure.second, microseconds)
    except errors.TimestampError as exception:
      timestamp = timelib.Timestamp.NONE_TIMESTAMP
      parser_mediator.ProduceExtractionError(
          'unable to determine timestamp with error: {0!s}'.format(
              exception))

    # If an offset is given for the event, apply the offset to convert to UTC.
    if timestamp and 'offset' in key:
      try:
        delta_microseconds = int(structure.utc_offset_minutes[1:], 10)
      except (IndexError, ValueError) as exception:
        raise errors.TimestampError(
            'Unable to parse minute offset from UTC with error: {0!s}.'.format(
                exception))

      delta_microseconds *= self._MICRO_SECONDS_PER_MINUTE
      if structure.utc_offset_minutes[0] == '-':
        delta_microseconds = -delta_microseconds
      timestamp += delta_microseconds

    event_data = SCCMLogEventData()
    event_data.component = structure.component
    # TODO: pass line number to offset or remove.
    event_data.offset = 0
    event_data.text = structure.text

    event = time_events.TimestampEvent(
        timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  def VerifyStructure(self, parser_mediator, lines):
    """Verifies whether content corresponds to an SCCM log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
    # Identify the token to which we attempt a match.
    match = self._PARSING_COMPONENTS['msg_left_delimiter'].match

    # Because logs files can lead with a partial event,
    # we can't assume that the first character (post-BOM)
    # in the file is the beginning of our match - so we
    # look for match anywhere in lines.
    return match in lines

Example #14

0

Show file

class SyslogParser(text_parser.PyparsingMultiLineTextParser):
    """Parses syslog formatted log files"""
    NAME = 'syslog'

    DESCRIPTION = 'Syslog Parser'

    _ENCODING = 'utf-8'

    _plugin_classes = {}

    # The reporter and facility fields can contain any printable character, but
    # to allow for processing of syslog formats that delimit the reporter and
    # facility with printable characters, we remove certain common delimiters
    # from the set of printable characters.
    _REPORTER_CHARACTERS = ''.join(
        [c for c in pyparsing.printables if c not in [':', '[', '<']])
    _FACILITY_CHARACTERS = ''.join(
        [c for c in pyparsing.printables if c not in [':', '>']])

    _SYSLOG_SEVERITY = [
        'EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG'
    ]

    _OFFSET_PREFIX = ['-', '+']

    _BODY_CONTENT = (
        r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2})|' \
        r'($|\n\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}' \
        r'[\+|-]\d{2}:\d{2}\s))')

    _VERIFICATION_REGEX = re.compile(r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s' +
                                     _BODY_CONTENT)

    # The Chrome OS syslog messages are of a format beginning with an
    # ISO 8601 combined date and time expression with timezone designator:
    #   2016-10-25T12:37:23.297265-07:00
    #
    # This will then be followed by the SYSLOG Severity which will be one of:
    #   EMERG,ALERT,CRIT,ERR,WARNING,NOTICE,INFO,DEBUG
    #
    # 2016-10-25T12:37:23.297265-07:00 INFO
    _CHROMEOS_VERIFICATION_REGEX = re.compile(
        r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.'
        r'\d{6}[\+|-]\d{2}:\d{2}\s'
        r'(EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG)' + _BODY_CONTENT)

    _PYPARSING_COMPONENTS = {
        'year':
        text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year'),
        'two_digit_month':
        (text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
            'two_digit_month')),
        'month':
        text_parser.PyparsingConstants.MONTH.setResultsName('month'),
        'day':
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day'),
        'hour':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('hour'),
        'minute':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minute'),
        'second':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('second'),
        'fractional_seconds':
        pyparsing.Word(pyparsing.nums).setResultsName('fractional_seconds'),
        'hostname':
        pyparsing.Word(pyparsing.printables).setResultsName('hostname'),
        'reporter':
        pyparsing.Word(_REPORTER_CHARACTERS).setResultsName('reporter'),
        'pid':
        text_parser.PyparsingConstants.PID.setResultsName('pid'),
        'facility':
        pyparsing.Word(_FACILITY_CHARACTERS).setResultsName('facility'),
        'severity':
        pyparsing.oneOf(_SYSLOG_SEVERITY).setResultsName('severity'),
        'body':
        pyparsing.Regex(_BODY_CONTENT, re.DOTALL).setResultsName('body'),
        'comment_body':
        pyparsing.SkipTo(' ---').setResultsName('body'),
        'iso_8601_offset':
        (pyparsing.oneOf(_OFFSET_PREFIX) +
         text_parser.PyparsingConstants.TWO_DIGITS + pyparsing.Optional(
             pyparsing.Literal(':') +
             text_parser.PyparsingConstants.TWO_DIGITS))
    }

    _PYPARSING_COMPONENTS['date'] = (
        _PYPARSING_COMPONENTS['month'] + _PYPARSING_COMPONENTS['day'] +
        _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') +
        _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') +
        _PYPARSING_COMPONENTS['second'] + pyparsing.Optional(
            pyparsing.Suppress('.') +
            _PYPARSING_COMPONENTS['fractional_seconds']))

    _PYPARSING_COMPONENTS['iso_8601_date'] = pyparsing.Combine(
        _PYPARSING_COMPONENTS['year'] + pyparsing.Literal('-') +
        _PYPARSING_COMPONENTS['two_digit_month'] + pyparsing.Literal('-') +
        _PYPARSING_COMPONENTS['day'] + pyparsing.Literal('T') +
        _PYPARSING_COMPONENTS['hour'] + pyparsing.Literal(':') +
        _PYPARSING_COMPONENTS['minute'] + pyparsing.Literal(':') +
        _PYPARSING_COMPONENTS['second'] + pyparsing.Literal('.') +
        _PYPARSING_COMPONENTS['fractional_seconds'] +
        _PYPARSING_COMPONENTS['iso_8601_offset'],
        joinString='',
        adjacent=True).setResultsName('iso_8601_date')

    _CHROMEOS_SYSLOG_LINE = (
        _PYPARSING_COMPONENTS['iso_8601_date'] +
        _PYPARSING_COMPONENTS['severity'] + _PYPARSING_COMPONENTS['reporter'] +
        pyparsing.Optional(pyparsing.Suppress(':')) + pyparsing.Optional(
            pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
            pyparsing.Suppress(']')) +
        pyparsing.Optional(pyparsing.Suppress(':')) +
        _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())

    _SYSLOG_LINE = (
        _PYPARSING_COMPONENTS['date'] + _PYPARSING_COMPONENTS['hostname'] +
        _PYPARSING_COMPONENTS['reporter'] + pyparsing.Optional(
            pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
            pyparsing.Suppress(']')) + pyparsing.Optional(
                pyparsing.Suppress('<') + _PYPARSING_COMPONENTS['facility'] +
                pyparsing.Suppress('>')) +
        pyparsing.Optional(pyparsing.Suppress(':')) +
        _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())

    _SYSLOG_COMMENT = (_PYPARSING_COMPONENTS['date'] +
                       pyparsing.Suppress(':') + pyparsing.Suppress('---') +
                       _PYPARSING_COMPONENTS['comment_body'] +
                       pyparsing.Suppress('---') + pyparsing.LineEnd())

    _KERNEL_SYSLOG_LINE = (
        _PYPARSING_COMPONENTS['date'] +
        pyparsing.Literal('kernel').setResultsName('reporter') +
        pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['body'] +
        pyparsing.lineEnd())

    LINE_STRUCTURES = [('syslog_line', _SYSLOG_LINE),
                       ('syslog_line', _KERNEL_SYSLOG_LINE),
                       ('syslog_comment', _SYSLOG_COMMENT),
                       ('chromeos_syslog_line', _CHROMEOS_SYSLOG_LINE)]

    _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

    def __init__(self):
        """Initializes a parser."""
        super(SyslogParser, self).__init__()
        self._last_month = 0
        self._maximum_year = 0
        self._plugin_by_reporter = {}
        self._year_use = 0

    def _UpdateYear(self, mediator, month):
        """Updates the year to use for events, based on last observed month.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      month (int): month observed by the parser, where January is 1.
    """
        if not self._year_use:
            self._year_use = mediator.GetEstimatedYear()
        if not self._maximum_year:
            self._maximum_year = mediator.GetLatestYear()

        if not self._last_month:
            self._last_month = month
            return

        # Some syslog daemons allow out-of-order sequences, so allow some leeway
        # to not cause Apr->May->Apr to cause the year to increment.
        # See http://bugzilla.adiscon.com/show_bug.cgi?id=527
        if self._last_month > (month + 1):
            if self._year_use != self._maximum_year:
                self._year_use += 1
        self._last_month = month

    def EnablePlugins(self, plugin_includes):
        """Enables parser plugins.

    Args:
      plugin_includes (list[str]): names of the plugins to enable, where None
          or an empty list represents all plugins. Note that the default plugin
          is handled separately.
    """
        super(SyslogParser, self).EnablePlugins(plugin_includes)

        self._plugin_by_reporter = {}
        for plugin in self._plugins:
            self._plugin_by_reporter[plugin.REPORTER] = plugin

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a matching entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in self._SUPPORTED_KEYS:
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        if key == 'chromeos_syslog_line':
            timestamp = timelib.Timestamp.FromTimeString(
                structure.iso_8601_date[0])
        else:
            month = timelib.MONTH_DICT.get(structure.month.lower(), None)
            if not month:
                parser_mediator.ProduceParserError(
                    'Invalid month value: {0:s}'.format(month))
                return

            self._UpdateYear(parser_mediator, month)
            timestamp = timelib.Timestamp.FromTimeParts(
                year=self._year_use,
                month=month,
                day=structure.day,
                hour=structure.hour,
                minutes=structure.minute,
                seconds=structure.second,
                timezone=parser_mediator.timezone)

        plugin = None
        if key == 'syslog_comment':
            event_data = SyslogCommentEventData()
            event_data.body = structure.body
            # TODO: pass line number to offset or remove.
            event_data.offset = 0

        else:
            event_data = SyslogLineEventData()
            event_data.body = structure.body
            event_data.hostname = structure.hostname or None
            # TODO: pass line number to offset or remove.
            event_data.offset = 0
            event_data.pid = structure.pid
            event_data.reporter = structure.reporter
            event_data.severity = structure.severity

            plugin = self._plugin_by_reporter.get(structure.reporter, None)
            if plugin:
                attributes = {
                    'hostname': structure.hostname,
                    'severity': structure.severity,
                    'reporter': structure.reporter,
                    'pid': structure.pid,
                    'body': structure.body
                }

                try:
                    # TODO: pass event_data instead of attributes.
                    plugin.Process(parser_mediator, timestamp, attributes)

                except errors.WrongPlugin:
                    plugin = None

        if not plugin:
            event = time_events.TimestampEvent(
                timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
            parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, unused_parser_mediator, lines):
        """Verifies that this is a syslog-formatted file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        return (re.match(self._VERIFICATION_REGEX, lines) or re.match(
            self._CHROMEOS_VERIFICATION_REGEX, lines)) is not None

Example #15

0

Show file

def _build_tgrep_parser(set_parse_actions=True):
    '''
    Builds a pyparsing-based parser object for tokenizing and
    interpreting tgrep search strings.
    '''
    tgrep_op = (pyparsing.Optional('!') +
                pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*'))
    tgrep_qstring = pyparsing.QuotedString(quoteChar='"',
                                           escChar='\\',
                                           unquoteResults=False)
    tgrep_node_regex = pyparsing.QuotedString(quoteChar='/',
                                              escChar='\\',
                                              unquoteResults=False)
    tgrep_qstring_icase = pyparsing.Regex(
        'i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"')
    tgrep_node_regex_icase = pyparsing.Regex(
        'i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/')
    tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+')
    tgrep_expr = pyparsing.Forward()
    tgrep_relations = pyparsing.Forward()
    tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')'
    tgrep_nltk_tree_pos = (pyparsing.Literal('N(') + pyparsing.Optional(
        pyparsing.Word(pyparsing.nums) + ',' + pyparsing.Optional(
            pyparsing.delimitedList(pyparsing.Word(pyparsing.nums), delim=',')
            + pyparsing.Optional(','))) + ')')
    tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+')
    tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label)
    # see _tgrep_segmented_pattern_action
    tgrep_node_label_use_pred = tgrep_node_label_use.copy()
    macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+')
    macro_name.setWhitespaceChars('')
    macro_use = pyparsing.Combine('@' + macro_name)
    tgrep_node_expr = (tgrep_node_label_use_pred | macro_use
                       | tgrep_nltk_tree_pos | tgrep_qstring_icase
                       | tgrep_node_regex_icase | tgrep_qstring
                       | tgrep_node_regex | '*' | tgrep_node_literal)
    tgrep_node_expr2 = (
        (tgrep_node_expr + pyparsing.Literal('=').setWhitespaceChars('') +
         tgrep_node_label.copy().setWhitespaceChars('')) | tgrep_node_expr)
    tgrep_node = (tgrep_parens | (pyparsing.Optional("'") + tgrep_node_expr2 +
                                  pyparsing.ZeroOrMore("|" + tgrep_node_expr)))
    tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']'
    tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node)
    tgrep_rel_conjunction = pyparsing.Forward()
    tgrep_rel_conjunction << (
        tgrep_relation +
        pyparsing.ZeroOrMore(pyparsing.Optional('&') + tgrep_rel_conjunction))
    tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore(
        "|" + tgrep_relations)
    tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations)
    tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional(
        tgrep_relations)
    tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(':' + tgrep_expr_labeled)
    macro_defn = (pyparsing.Literal('@') + pyparsing.White().suppress() +
                  macro_name + tgrep_expr2)
    tgrep_exprs = (
        pyparsing.Optional(macro_defn +
                           pyparsing.ZeroOrMore(';' + macro_defn) + ';') +
        tgrep_expr2 + pyparsing.ZeroOrMore(';' + (macro_defn | tgrep_expr2)) +
        pyparsing.ZeroOrMore(';').suppress())
    if set_parse_actions:
        tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action)
        tgrep_node_label_use_pred.setParseAction(
            _tgrep_node_label_pred_use_action)
        macro_use.setParseAction(_tgrep_macro_use_action)
        tgrep_node.setParseAction(_tgrep_node_action)
        tgrep_node_expr2.setParseAction(_tgrep_bind_node_label_action)
        tgrep_parens.setParseAction(_tgrep_parens_action)
        tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action)
        tgrep_relation.setParseAction(_tgrep_relation_action)
        tgrep_rel_conjunction.setParseAction(_tgrep_conjunction_action)
        tgrep_relations.setParseAction(_tgrep_rel_disjunction_action)
        macro_defn.setParseAction(_macro_defn_action)
        # the whole expression is also the conjunction of two
        # predicates: the first node predicate, and the remaining
        # relation predicates
        tgrep_expr.setParseAction(_tgrep_conjunction_action)
        tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action)
        tgrep_expr2.setParseAction(
            functools.partial(_tgrep_conjunction_action, join_char=':'))
        tgrep_exprs.setParseAction(_tgrep_exprs_action)
    return tgrep_exprs.ignore('#' + pyparsing.restOfLine)

Example #16

0

Show file

File: reg_text.py Project: cmc333333/regulations-parser

any_depth_p = unified.any_depth_p.copy().setParseAction(_any_depth_parse)


def initial_markers(text):
    """Pull out a list of the first paragraph markers, i.e. markers before any
    text"""
    try:
        return list(any_depth_p.parseString(text))
    except pyparsing.ParseException:
        return []


_collapsed_grammar = QuickSearchable(
    # A guard to reduce false positives
    pyparsing.Suppress(pyparsing.Regex(u',|\.|-|—|>|means ')) + any_depth_p)


def collapsed_markers(text):
    """Not all paragraph markers are at the beginning of of the text. This
    grabs inner markers like (1) and (i) here:
    (c) cContent —(1) 1Content (i) iContent"""
    potential = [triplet for triplet in _collapsed_grammar.scanString(text)]
    #   remove any that overlap with citations
    potential = [trip for trip in remove_citation_overlaps(text, potential)]
    #   flatten the results
    potential = [pm for pms, _, _ in potential for pm in pms]
    #   remove any matches that aren't (a), (1), (i), etc. -- All other
    #   markers can't be collapsed
    first_markers = [level[0] for level in p_levels]
    potential = [pm for pm in potential if pm in first_markers]

Example #17

0

Show file

# References:
# http://stackoverflow.com/questions/11133339/parsing-a-complex-logical-expression-in-pyparsing-in-a-binary-tree-fashion
# http://stackoverflow.com/questions/33532451/pyparsing-python-binary-boolean-expression-to-xml-nesting-issue-2-7-10
# http://pyparsing.wikispaces.com/file/view/simpleArith.py/30268305/simpleArith.py
# http://qiita.com/knoguchi/items/ee949989d0a9f04bee6f
# http://qiita.com/knoguchi/items/6f9b7383b7252a9ebdad
"""

import logging
import pyparsing as pp
import networkx as nx

from patternmatching.query.Condition import *

LPAR, RPAR = map(pp.Suppress, "()")
numvalue = pp.Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?")
term = pp.Forward()
factor = pp.Forward()

addsub = pp.oneOf('+ -')
muldiv = pp.oneOf('* /')
compare = pp.Regex(">=|<=|!=|>|<|==").setName("compare")
NOT_ = pp.Keyword("NOT").setName("NOT")
AND_ = pp.Keyword("AND").setName("AND")
OR_ = pp.Keyword("OR").setName("OR")

symbol = pp.Word(pp.alphas).setName("symbol")
propsymbol = pp.Group(symbol + "." + symbol).setName("propsymbol")
formula = pp.Optional(addsub) + term + pp.ZeroOrMore(addsub + term)
term << (factor + pp.ZeroOrMore(muldiv + factor))
factor << (numvalue | propsymbol | LPAR + formula + RPAR)

Example #18

0

Show file

File: structured_data_annotator.py Project: leobouscarrat/EpiTator

def word_token_regex(disallowed_delimiter):
    return pypar.Regex(r"[^\s\n" + re.escape(disallowed_delimiter) + r"]+")

Example #19

0

Show file

File: selinux.py Project: rhoska/plaso

class SELinuxParser(text_parser.PyparsingSingleLineTextParser):
  """Parser for SELinux audit.log files."""

  NAME = 'selinux'
  DESCRIPTION = 'Parser for SELinux audit.log files.'

  _ENCODING = 'utf-8'

  _SELINUX_KEY_VALUE_GROUP = pyparsing.Group(
      pyparsing.Word(pyparsing.alphanums).setResultsName('key') +
      pyparsing.Suppress('=') + (
          pyparsing.QuotedString('"') ^
          pyparsing.Word(pyparsing.printables)).setResultsName('value'))

  _SELINUX_KEY_VALUE_DICT = pyparsing.Dict(
      pyparsing.ZeroOrMore(_SELINUX_KEY_VALUE_GROUP))

  _SELINUX_BODY_GROUP = pyparsing.Group(
      pyparsing.Empty().setResultsName('key') +
      pyparsing.restOfLine.setResultsName('value'))

  _SELINUX_MSG_GROUP = pyparsing.Group(
      pyparsing.Literal('msg').setResultsName('key') +
      pyparsing.Suppress('=audit(') +
      pyparsing.Word(pyparsing.nums).setResultsName('seconds') +
      pyparsing.Suppress('.') +
      pyparsing.Word(pyparsing.nums).setResultsName('milliseconds') +
      pyparsing.Suppress(':') +
      pyparsing.Word(pyparsing.nums).setResultsName('serial') +
      pyparsing.Suppress('):'))

  _SELINUX_TYPE_GROUP = pyparsing.Group(
      pyparsing.Literal('type').setResultsName('key') +
      pyparsing.Suppress('=') + (
          pyparsing.Word(pyparsing.srange('[A-Z_]')) ^
          pyparsing.Regex(r'UNKNOWN\[[0-9]+\]')).setResultsName('value'))

  _SELINUX_TYPE_AVC_GROUP = pyparsing.Group(
      pyparsing.Literal('type').setResultsName('key') +
      pyparsing.Suppress('=') + (
          pyparsing.Word('AVC') ^
          pyparsing.Word('USER_AVC')).setResultsName('value'))

  # A log line is formatted as: type=TYPE msg=audit([0-9]+\.[0-9]+:[0-9]+): .*
  _SELINUX_LOG_LINE = pyparsing.Dict(
      _SELINUX_TYPE_GROUP +
      _SELINUX_MSG_GROUP +
      _SELINUX_BODY_GROUP)

  LINE_STRUCTURES = [('line', _SELINUX_LOG_LINE)]

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a structure of tokens derived from a line of a text file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key != 'line':
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    msg_value = self._GetValueFromStructure(structure, 'msg')
    if not msg_value:
      parser_mediator.ProduceExtractionWarning(
          'missing msg value: {0!s}'.format(structure))
      return

    try:
      seconds = int(msg_value[0], 10)
    except ValueError:
      parser_mediator.ProduceExtractionWarning(
          'unsupported number of seconds in msg value: {0!s}'.format(
              structure))
      return

    try:
      milliseconds = int(msg_value[1], 10)
    except ValueError:
      parser_mediator.ProduceExtractionWarning(
          'unsupported number of milliseconds in msg value: {0!s}'.format(
              structure))
      return

    timestamp = ((seconds * 1000) + milliseconds) * 1000
    body_text = structure[2][0]

    try:
      # Try to parse the body text as key value pairs. Note that not
      # all log lines will be properly formatted key value pairs.
      body_structure = self._SELINUX_KEY_VALUE_DICT.parseString(body_text)
    except pyparsing.ParseException:
      body_structure = pyparsing.ParseResults()

    event_data = SELinuxLogEventData()
    event_data.audit_type = self._GetValueFromStructure(structure, 'type')
    event_data.body = body_text
    event_data.pid = self._GetValueFromStructure(body_structure, 'pid')
    # TODO: pass line number to offset or remove.
    event_data.offset = 0

    event = time_events.TimestampEvent(
        timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  def VerifyStructure(self, parser_mediator, line):
    """Verifies if a line from a text file is in the expected format.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
    try:
      structure = self._SELINUX_LOG_LINE.parseString(line)
    except pyparsing.ParseException as exception:
      logger.debug(
          'Unable to parse SELinux audit.log file with error: {0!s}'.format(
              exception))
      return False

    return 'type' in structure and 'msg' in structure

Example #20

0

Show file

    class Grammar:
        keywords = [
            'and', 'or', 'not', 'if', 'then', 'else', 'include', 'inherit',
            'null', 'true', 'false', 'for', 'in'
        ]

        # This is a hack: this condition helps uselessly recursing into the grammar for
        # juxtapositions.
        early_abort_scan = ~p.oneOf([';', ',', ']', '}', 'for'])

        expression = pattern('expression', p.Forward())

        comment = p.Regex('#') + ~p.FollowedBy(sym('.')) + p.restOfLine
        doc_comment = pattern('doc_comment', (sym('#.') - p.restOfLine))

        quotedIdentifier = pattern('quotedIdentifier',
                                   p.QuotedString('`', multiline=False))

        # - Must start with an alphascore
        # - May contain alphanumericscores and special characters such as : and -
        # - Must not end in a special character
        identifier = pattern(
            'identifier',
            parseWithLocation(
                quotedIdentifier
                | p.Regex(r'[a-zA-Z_]([a-zA-Z0-9_:-]*[a-zA-Z0-9_])?'),
                Identifier))

        # Variable identifier (can't be any of the keywords, which may have lower matching priority)
        variable = pattern(
            'variable', ~p.MatchFirst(p.oneOf(keywords)) +
            pattern('identifier', parseWithLocation(identifier.copy(), Var)))

        # Contants
        integer = pattern(
            'integer',
            parseWithLocation(p.Word(p.nums), convertAndMake(int, Literal)))
        floating = pattern(
            'floating',
            parseWithLocation(p.Regex(r'\d*\.\d+'),
                              convertAndMake(float, Literal)))
        dq_string = pattern(
            'dq_string',
            parseWithLocation(
                p.QuotedString('"',
                               escChar='\\',
                               unquoteResults=False,
                               multiline=True),
                convertAndMake(unquote, Literal)))
        sq_string = pattern(
            'sq_string',
            parseWithLocation(
                p.QuotedString("'",
                               escChar='\\',
                               unquoteResults=False,
                               multiline=True),
                convertAndMake(unquote, Literal)))
        boolean = pattern(
            'boolean',
            parseWithLocation(
                p.Keyword('true') | p.Keyword('false'),
                convertAndMake(mkBool, Literal)))
        null = pattern('null', parseWithLocation(p.Keyword('null'), Null))

        # List
        list_ = pattern(
            'list',
            parseWithLocation(bracketedList('[', ']', ',', expression), List))

        # Tuple
        inherit = pattern(
            'inherit', (kw('inherit') -
                        p.ZeroOrMore(variable)).setParseAction(inheritNodes))
        schema_spec = pattern(
            'schema_spec',
            parseWithLocation(
                p.Optional(p.Keyword('private').setParseAction(lambda: True),
                           default=False) -
                p.Optional(p.Keyword('required').setParseAction(lambda: True),
                           default=False) -
                p.Optional(expression, default=any_schema_expr),
                MemberSchemaNode))
        optional_schema = pattern(
            'optional_schema',
            p.Optional(p.Suppress(':') - schema_spec, default=no_schema))

        expression_value = pattern('expression_value',
                                   sym('=') - swallow_errors(expression))
        void_value = pattern(
            'void_value',
            parseWithLocation(p.FollowedBy(sym(';') | sym('}')),
                              lambda loc: Void(loc, 'nonameyet')))
        member_value = pattern('member_value',
                               swallow_errors(expression_value | void_value))
        named_member = pattern(
            'named_member',
            parseWithLocation(
                identifier - optional_schema - member_value -
                swallow_remainder(), TupleMemberNode))
        documented_member = pattern(
            'documented_member',
            parseWithLocation(
                parseWithLocation(p.ZeroOrMore(doc_comment), DocComment) +
                named_member, attach_doc_comment))
        tuple_member = early_abort_scan + pattern(
            'tuple_member',
            swallow_errors(inherit | documented_member) - swallow_remainder())

        ErrorAwareTupleNode = functools.partial(TupleNode, allow_errors)
        tuple_members = pattern(
            'tuple_members',
            parseWithLocation(listMembers(';', tuple_member),
                              ErrorAwareTupleNode))
        tuple << pattern(
            'tuple',
            parseWithLocation(
                bracketedList('{',
                              '}',
                              ';',
                              tuple_member,
                              allow_missing_close=allow_errors),
                ErrorAwareTupleNode))

        # Argument list will live by itself as a atom. Actually, it's a tuple, but we
        # don't call it that because we use that term for something else already :)
        arg_list = pattern(
            'arg_list',
            bracketedList('(', ')', ',', expression).setParseAction(ArgList))

        parenthesized_expr = pattern('parenthesized_expr',
                                     (sym('(') - expression -
                                      ')').setParseAction(head))

        unary_op = pattern(
            'unary_op', (p.oneOf(' '.join(functions.unary_operators.keys())) -
                         expression).setParseAction(mkUnOp))

        if_then_else = pattern(
            'if_then_else',
            parseWithLocation(
                kw('if') + expression + kw('then') + expression + kw('else') +
                expression, Condition))

        list_comprehension = pattern(
            'list_comprehension',
            parseWithLocation(
                sym('[') + expression + kw('for') + variable + kw('in') +
                expression + p.Optional(kw('if') + expression) + sym(']'),
                ListComprehension))

        # We don't allow space-application here
        # Now our grammar is becoming very dirty and hackish
        deref = pattern('deref', p.Forward())
        include = pattern('include',
                          parseWithLocation(kw('include') - deref, Include))

        atom = pattern('atom', (tuple
                                | sq_string
                                | dq_string
                                | variable
                                | floating
                                | integer
                                | boolean
                                | list_
                                | null
                                | unary_op
                                | parenthesized_expr
                                | if_then_else
                                | include
                                | list_comprehension))

        # We have two different forms of function application, so they can have 2
        # different precedences. This one: fn(args), which binds stronger than
        # dereferencing (fn(args).attr == (fn(args)).attr)
        applic1 = pattern(
            'applic1',
            parseWithLocation(atom - p.ZeroOrMore(arg_list), mkApplications))

        # Dereferencing of an expression (obj.bar)
        deref << parseWithLocation(
            applic1 -
            p.ZeroOrMore(p.Suppress('.') - swallow_errors(identifier)),
            mkDerefs)

        # All binary operators at various precedence levels go here:
        # This piece of code does the moral equivalent of:
        #
        #     T = F*F | F/F | F
        #     E = T+T | T-T | T
        #
        # etc.
        term = deref
        for op_level in functions.binary_operators_before_juxtaposition:
            operator_syms = list(op_level.keys())
            term = (term - p.ZeroOrMore(p.oneOf(operator_syms) -
                                        term)).setParseAction(mkBinOps)

        # Juxtaposition function application (fn arg), must be 1-arg every time
        applic2 = pattern(
            'applic2',
            parseWithLocation(term - p.ZeroOrMore(early_abort_scan + term),
                              mkApplications))

        term = applic2
        for op_level in functions.binary_operators_after_juxtaposition:
            operator_syms = list(op_level.keys())
            term = (term - p.ZeroOrMore(p.oneOf(operator_syms) -
                                        term)).setParseAction(mkBinOps)

        expression << term

        # Two entry points: start at an arbitrary expression, or expect the top-level
        # scope to be a tuple.
        start = pattern('start', expression.copy().ignore(comment))
        start_tuple = tuple_members.ignore(comment)

Example #21

0

Show file

File: sccm.py Project: siriusAnalyst/plaso

class SCCMParser(text_parser.PyparsingMultiLineTextParser):
    """Parser for Windows System Center Configuration Manager (SCCM) logs."""

    NAME = 'sccm'
    DESCRIPTION = 'Parser for SCCM logs files.'

    _ENCODING = 'utf-8-sig'

    # Increasing the buffer size as SCCM messages are commonly well larger
    # than the default value.
    BUFFER_SIZE = 16384

    LINE_STRUCTURES = []

    _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS
    _ONE_OR_TWO_DIGITS = text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS

    # PyParsing Components used to construct grammars for parsing lines.
    _PARSING_COMPONENTS = {
        'msg_left_delimiter':
        pyparsing.Literal('<![LOG['),
        'msg_right_delimiter':
        pyparsing.Literal(']LOG]!><time="'),
        'year':
        _FOUR_DIGITS.setResultsName('year'),
        'month':
        _ONE_OR_TWO_DIGITS.setResultsName('month'),
        'day':
        _ONE_OR_TWO_DIGITS.setResultsName('day'),
        'fraction_of_second':
        pyparsing.Regex(r'\d{3,7}').setResultsName('fraction_of_second'),
        'utc_offset_minutes':
        pyparsing.Regex(r'[-+]\d{2,3}').setResultsName('utc_offset_minutes'),
        'date_prefix':
        pyparsing.Literal('" date="').setResultsName('date_prefix'),
        'component_prefix':
        pyparsing.Literal('" component="').setResultsName('component_prefix'),
        'component':
        pyparsing.Word(pyparsing.alphanums).setResultsName('component'),
        'text':
        pyparsing.Regex(r'.*?(?=(]LOG]!><time="))',
                        re.DOTALL).setResultsName('text'),
        'line_remainder':
        pyparsing.Regex(r'.*?(?=(\<!\[LOG\[))',
                        re.DOTALL).setResultsName('line_remainder'),
        'lastline_remainder':
        pyparsing.restOfLine.setResultsName('lastline_remainder'),
        'hour':
        _ONE_OR_TWO_DIGITS.setResultsName('hour'),
        'minute':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minute'),
        'second':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('second')
    }

    # Base grammar for individual log event lines.
    LINE_GRAMMAR_BASE = (
        _PARSING_COMPONENTS['msg_left_delimiter'] +
        _PARSING_COMPONENTS['text'] +
        _PARSING_COMPONENTS['msg_right_delimiter'] +
        _PARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') +
        _PARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') +
        _PARSING_COMPONENTS['second'] + pyparsing.Suppress('.') +
        _PARSING_COMPONENTS['fraction_of_second'] +
        _PARSING_COMPONENTS['date_prefix'] + _PARSING_COMPONENTS['month'] +
        pyparsing.Suppress('-') + _PARSING_COMPONENTS['day'] +
        pyparsing.Suppress('-') + _PARSING_COMPONENTS['year'] +
        _PARSING_COMPONENTS['component_prefix'] +
        _PARSING_COMPONENTS['component'])

    # Grammar for individual log event lines with a minutes offset from UTC.
    LINE_GRAMMAR_OFFSET = (
        _PARSING_COMPONENTS['msg_left_delimiter'] +
        _PARSING_COMPONENTS['text'] +
        _PARSING_COMPONENTS['msg_right_delimiter'] +
        _PARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') +
        _PARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') +
        _PARSING_COMPONENTS['second'] + pyparsing.Suppress('.') +
        _PARSING_COMPONENTS['fraction_of_second'] +
        _PARSING_COMPONENTS['utc_offset_minutes'] +
        _PARSING_COMPONENTS['date_prefix'] + _PARSING_COMPONENTS['month'] +
        pyparsing.Suppress('-') + _PARSING_COMPONENTS['day'] +
        pyparsing.Suppress('-') + _PARSING_COMPONENTS['year'] +
        _PARSING_COMPONENTS['component_prefix'] +
        _PARSING_COMPONENTS['component'])

    LINE_STRUCTURES = [
        ('log_entry',
         LINE_GRAMMAR_BASE + _PARSING_COMPONENTS['line_remainder']),
        ('log_entry_at_end', LINE_GRAMMAR_BASE +
         _PARSING_COMPONENTS['lastline_remainder'] + pyparsing.lineEnd),
        ('log_entry_offset',
         LINE_GRAMMAR_OFFSET + _PARSING_COMPONENTS['line_remainder']),
        ('log_entry_offset_at_end', LINE_GRAMMAR_OFFSET +
         _PARSING_COMPONENTS['lastline_remainder'] + pyparsing.lineEnd)
    ]

    def _GetISO8601String(self, structure):
        """Retrieves an ISO8601 date time string from the structure.

    The date and time values in the SCCM log are formatted as:
    time="19:33:19.766-330" date="11-28-2014"

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Returns:
      str: ISO 8601 date time string.

    Raises:
      ValueError: if the structure cannot be converted into a date time string.
    """
        fraction_of_second_length = len(structure.fraction_of_second)
        if fraction_of_second_length not in (3, 6, 7):
            raise ValueError(
                'unsupported time fraction of second length: {0:d}'.format(
                    fraction_of_second_length))

        try:
            fraction_of_second = int(structure.fraction_of_second, 10)
        except (TypeError, ValueError) as exception:
            raise ValueError(
                'unable to determine fraction of second with error: {0!s}'.
                format(exception))

        # TODO: improve precision support, but for now ignore the 100ns precision.
        if fraction_of_second_length == 7:
            fraction_of_second, _ = divmod(fraction_of_second, 10)

        date_time_string = '{0:04d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}'.format(
            structure.year, structure.month, structure.day, structure.hour,
            structure.minute, structure.second)

        if fraction_of_second_length > 0:
            date_time_string = '{0:s}.{1:d}'.format(date_time_string,
                                                    fraction_of_second)

        utc_offset_minutes = structure.get('utc_offset_minutes', None)
        if utc_offset_minutes is not None:
            try:
                time_zone_offset = int(utc_offset_minutes[1:], 10)
            except (IndexError, ValueError) as exception:
                raise ValueError(
                    'Unable to parse time zone offset with error: {0!s}.'.
                    format(exception))

            time_zone_hours, time_zone_minutes = divmod(time_zone_offset, 60)
            date_time_string = '{0:s}{1:s}{2:02d}:{3:02d}'.format(
                date_time_string, utc_offset_minutes[0], time_zone_hours,
                time_zone_minutes)

        return date_time_string

    def ParseRecord(self, parser_mediator, key, structure):
        """Parse the record and return an SCCM log event object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in ('log_entry', 'log_entry_at_end', 'log_entry_offset',
                       'log_entry_offset_at_end'):
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        try:
            date_time_string = self._GetISO8601String(structure)
        except ValueError as exception:
            parser_mediator.ProduceExtractionError(
                'unable to determine date time string with error: {0!s}'.
                format(exception))

        fraction_of_second_length = len(structure.fraction_of_second)
        if fraction_of_second_length == 3:
            date_time = dfdatetime_time_elements.TimeElementsInMilliseconds()
        elif fraction_of_second_length in (6, 7):
            date_time = dfdatetime_time_elements.TimeElementsInMicroseconds()

        try:
            date_time.CopyFromStringISO8601(date_time_string)
        except ValueError as exception:
            parser_mediator.ProduceExtractionError(
                'unable to parse date time value: {0:s} with error: {1!s}'.
                format(date_time_string, exception))
            return

        event_data = SCCMLogEventData()
        event_data.component = structure.component
        # TODO: pass line number to offset or remove.
        event_data.offset = 0
        event_data.text = structure.text

        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_WRITTEN)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, parser_mediator, lines):
        """Verifies whether content corresponds to an SCCM log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        # Identify the token to which we attempt a match.
        match = self._PARSING_COMPONENTS['msg_left_delimiter'].match

        # Because logs files can lead with a partial event,
        # we can't assume that the first character (post-BOM)
        # in the file is the beginning of our match - so we
        # look for match anywhere in lines.
        return match in lines

Example #22

0

Show file

class SyslogParser(text_parser.PyparsingMultiLineTextParser):
    """Parses syslog formatted log files"""

    NAME = 'syslog'
    DATA_FORMAT = 'System log (syslog) file'

    _ENCODING = 'utf-8'

    _plugin_classes = {}

    # The reporter and facility fields can contain any printable character, but
    # to allow for processing of syslog formats that delimit the reporter and
    # facility with printable characters, we remove certain common delimiters
    # from the set of printable characters.
    _REPORTER_CHARACTERS = ''.join(
        [c for c in pyparsing.printables if c not in [':', '[', '<']])
    _FACILITY_CHARACTERS = ''.join(
        [c for c in pyparsing.printables if c not in [':', '>']])

    _SYSLOG_SEVERITY = [
        'EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG'
    ]

    # TODO: change pattern to allow only spaces as a field separator.
    _BODY_PATTERN = (
        r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2})|' \
        r'($|\n\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}' \
        r'[\+|-]\d{2}:\d{2}\s))')

    # The rsyslog file format (RSYSLOG_FileFormat) consists of:
    # %TIMESTAMP% %HOSTNAME% %syslogtag%%msg%
    #
    # Where %TIMESTAMP% is in RFC-3339 date time format e.g.
    # 2020-05-31T00:00:45.698463+00:00
    _RSYSLOG_VERIFICATION_PATTERN = (r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.'
                                     r'\d{6}[\+|-]\d{2}:\d{2} ' +
                                     _BODY_PATTERN)

    # The rsyslog traditional file format (RSYSLOG_TraditionalFileFormat)
    # consists of:
    # %TIMESTAMP% %HOSTNAME% %syslogtag%%msg%
    #
    # Where %TIMESTAMP% is in yearless ctime date time format e.g.
    # Jan 22 07:54:32
    # TODO: change pattern to allow only spaces as a field separator.
    _RSYSLOG_TRADITIONAL_VERIFICATION_PATTERN = (
        r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s' + _BODY_PATTERN)

    # The Chrome OS syslog messages are of a format beginning with an
    # ISO 8601 combined date and time expression with timezone designator:
    #   2016-10-25T12:37:23.297265-07:00
    #
    # This will then be followed by the SYSLOG Severity which will be one of:
    #   EMERG,ALERT,CRIT,ERR,WARNING,NOTICE,INFO,DEBUG
    #
    # 2016-10-25T12:37:23.297265-07:00 INFO
    _CHROMEOS_VERIFICATION_PATTERN = (
        r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.'
        r'\d{6}[\+|-]\d{2}:\d{2}\s'
        r'(EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG)' + _BODY_PATTERN)

    # Bundle all verification patterns into a single regular expression.
    _VERIFICATION_REGEX = re.compile('({0:s})'.format('|'.join([
        _CHROMEOS_VERIFICATION_PATTERN, _RSYSLOG_VERIFICATION_PATTERN,
        _RSYSLOG_TRADITIONAL_VERIFICATION_PATTERN
    ])))

    _PYPARSING_COMPONENTS = {
        'year':
        text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year'),
        'two_digit_month':
        (text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
            'two_digit_month')),
        'month':
        text_parser.PyparsingConstants.MONTH.setResultsName('month'),
        'day':
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day'),
        'hour':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('hour'),
        'minute':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minute'),
        'second':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('second'),
        'fractional_seconds':
        pyparsing.Word(pyparsing.nums).setResultsName('fractional_seconds'),
        'hostname':
        pyparsing.Word(pyparsing.printables).setResultsName('hostname'),
        'reporter':
        pyparsing.Word(_REPORTER_CHARACTERS).setResultsName('reporter'),
        'pid':
        text_parser.PyparsingConstants.PID.setResultsName('pid'),
        'facility':
        pyparsing.Word(_FACILITY_CHARACTERS).setResultsName('facility'),
        'severity':
        pyparsing.oneOf(_SYSLOG_SEVERITY).setResultsName('severity'),
        'body':
        pyparsing.Regex(_BODY_PATTERN, re.DOTALL).setResultsName('body'),
        'comment_body':
        pyparsing.SkipTo(' ---').setResultsName('body')
    }

    _PYPARSING_COMPONENTS['date'] = (
        _PYPARSING_COMPONENTS['month'] + _PYPARSING_COMPONENTS['day'] +
        _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') +
        _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') +
        _PYPARSING_COMPONENTS['second'] + pyparsing.Optional(
            pyparsing.Suppress('.') +
            _PYPARSING_COMPONENTS['fractional_seconds']))

    _PYPARSING_COMPONENTS['rfc3339_datetime'] = pyparsing.Combine(
        pyparsing.Word(pyparsing.nums, exact=4) + pyparsing.Literal('-') +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('-') +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('T') +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal(':') +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal(':') +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('.') +
        pyparsing.Word(pyparsing.nums, exact=6) + pyparsing.oneOf(['-', '+']) +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Optional(
            pyparsing.Literal(':') + pyparsing.Word(pyparsing.nums, exact=2)),
        joinString='',
        adjacent=True)

    _CHROMEOS_SYSLOG_LINE = (
        _PYPARSING_COMPONENTS['rfc3339_datetime'].setResultsName('datetime') +
        _PYPARSING_COMPONENTS['severity'] + _PYPARSING_COMPONENTS['reporter'] +
        pyparsing.Optional(pyparsing.Suppress(':')) + pyparsing.Optional(
            pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
            pyparsing.Suppress(']')) +
        pyparsing.Optional(pyparsing.Suppress(':')) +
        _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())

    _RSYSLOG_LINE = (
        _PYPARSING_COMPONENTS['rfc3339_datetime'].setResultsName('datetime') +
        _PYPARSING_COMPONENTS['hostname'] + _PYPARSING_COMPONENTS['reporter'] +
        pyparsing.Optional(
            pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
            pyparsing.Suppress(']')) + pyparsing.Optional(
                pyparsing.Suppress('<') + _PYPARSING_COMPONENTS['facility'] +
                pyparsing.Suppress('>')) +
        pyparsing.Optional(pyparsing.Suppress(':')) +
        _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())

    _RSYSLOG_TRADITIONAL_LINE = (
        _PYPARSING_COMPONENTS['date'] + _PYPARSING_COMPONENTS['hostname'] +
        _PYPARSING_COMPONENTS['reporter'] + pyparsing.Optional(
            pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
            pyparsing.Suppress(']')) + pyparsing.Optional(
                pyparsing.Suppress('<') + _PYPARSING_COMPONENTS['facility'] +
                pyparsing.Suppress('>')) +
        pyparsing.Optional(pyparsing.Suppress(':')) +
        _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())

    _SYSLOG_COMMENT = (_PYPARSING_COMPONENTS['date'] +
                       pyparsing.Suppress(':') + pyparsing.Suppress('---') +
                       _PYPARSING_COMPONENTS['comment_body'] +
                       pyparsing.Suppress('---') + pyparsing.LineEnd())

    _KERNEL_SYSLOG_LINE = (
        _PYPARSING_COMPONENTS['date'] +
        pyparsing.Literal('kernel').setResultsName('reporter') +
        pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['body'] +
        pyparsing.lineEnd())

    LINE_STRUCTURES = [('chromeos_syslog_line', _CHROMEOS_SYSLOG_LINE),
                       ('kernel_syslog_line', _KERNEL_SYSLOG_LINE),
                       ('rsyslog_line', _RSYSLOG_LINE),
                       ('rsyslog_traditional_line', _RSYSLOG_TRADITIONAL_LINE),
                       ('syslog_comment', _SYSLOG_COMMENT)]

    _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

    def __init__(self):
        """Initializes a parser."""
        super(SyslogParser, self).__init__()
        self._last_month = 0
        self._maximum_year = 0
        self._plugin_by_reporter = {}
        self._year_use = 0

    def _UpdateYear(self, mediator, month):
        """Updates the year to use for events, based on last observed month.

    Args:
      mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      month (int): month observed by the parser, where January is 1.
    """
        if not self._year_use:
            self._year_use = mediator.GetEstimatedYear()
        if not self._maximum_year:
            self._maximum_year = mediator.GetLatestYear()

        if not self._last_month:
            self._last_month = month
            return

        # Some syslog daemons allow out-of-order sequences, so allow some leeway
        # to not cause Apr->May->Apr to cause the year to increment.
        # See http://bugzilla.adiscon.com/show_bug.cgi?id=527
        if self._last_month > (month + 1):
            if self._year_use != self._maximum_year:
                self._year_use += 1
        self._last_month = month

    def EnablePlugins(self, plugin_includes):
        """Enables parser plugins.

    Args:
      plugin_includes (list[str]): names of the plugins to enable, where None
          or an empty list represents all plugins. Note that the default plugin
          is handled separately.
    """
        super(SyslogParser, self).EnablePlugins(plugin_includes)

        self._plugin_by_reporter = {}
        for plugin in self._plugins:
            self._plugin_by_reporter[plugin.REPORTER] = plugin

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a matching entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in self._SUPPORTED_KEYS:
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        if key in ('chromeos_syslog_line', 'rsyslog_line'):
            date_time = dfdatetime_time_elements.TimeElementsInMicroseconds()
            iso8601_string = self._GetValueFromStructure(structure, 'datetime')

            try:
                date_time.CopyFromStringISO8601(iso8601_string)
            except ValueError:
                parser_mediator.ProduceExtractionWarning(
                    'invalid date time value: {0:s}'.format(iso8601_string))
                return

        else:
            # TODO: add support for fractional seconds.

            month = self._GetValueFromStructure(structure, 'month')
            try:
                month = timelib.MONTH_DICT.get(month.lower(), 0)
            except AttributeError:
                parser_mediator.ProduceExtractionWarning(
                    'invalid month value: {0!s}'.format(month))
                return

            if month != 0:
                self._UpdateYear(parser_mediator, month)

            day = self._GetValueFromStructure(structure, 'day')
            hours = self._GetValueFromStructure(structure, 'hour')
            minutes = self._GetValueFromStructure(structure, 'minute')
            seconds = self._GetValueFromStructure(structure, 'second')

            time_elements_tuple = (self._year_use, month, day, hours, minutes,
                                   seconds)

            try:
                date_time = dfdatetime_time_elements.TimeElements(
                    time_elements_tuple=time_elements_tuple)
                date_time.is_local_time = True
            except ValueError:
                parser_mediator.ProduceExtractionWarning(
                    'invalid date time value: {0!s}'.format(
                        time_elements_tuple))
                return

        plugin = None
        if key == 'syslog_comment':
            event_data = SyslogCommentEventData()
            event_data.body = self._GetValueFromStructure(structure, 'body')
            # TODO: pass line number to offset or remove.
            event_data.offset = 0

        else:
            event_data = SyslogLineEventData()
            event_data.body = self._GetValueFromStructure(structure, 'body')
            event_data.hostname = self._GetValueFromStructure(
                structure, 'hostname')
            # TODO: pass line number to offset or remove.
            event_data.offset = 0
            event_data.pid = self._GetValueFromStructure(structure, 'pid')
            event_data.reporter = self._GetValueFromStructure(
                structure, 'reporter')
            event_data.severity = self._GetValueFromStructure(
                structure, 'severity')

            plugin = self._plugin_by_reporter.get(event_data.reporter, None)
            if plugin:
                attributes = {
                    'body': event_data.body,
                    'hostname': event_data.hostname,
                    'pid': event_data.pid,
                    'reporter': event_data.reporter,
                    'severity': event_data.severity
                }

                try:
                    # TODO: pass event_data instead of attributes.
                    plugin.Process(parser_mediator, date_time, attributes)

                except errors.WrongPlugin:
                    plugin = None

        if not plugin:
            event = time_events.DateTimeValuesEvent(
                date_time,
                definitions.TIME_DESCRIPTION_WRITTEN,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, parser_mediator, lines):
        """Verifies that this is a syslog-formatted file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between
          parsers and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        return bool(self._VERIFICATION_REGEX.match(lines))

Example #23

0

Show file

    return instruction.String (string, locn=_location (src, locn))

#@pp.traceParseAction
def _make_number (src, locn, toks):
    assert len (toks) == 1
    return instruction.Number (float (toks [0]), locn=_location (src, locn))

#@pp.traceParseAction
def _make_procedure (src, locn, toks):
    assert len (toks) == 1
    return instruction.Procedure (toks [0], locn=_location (src, locn))

# Matches a number which may be negative, include a decimal point or exponential notation.
# e.g. 1, -1, 3.14, 314e-2
_number = pp.Regex (r'-?\d+(\.\d*)?([eE]-?\d+)?').setParseAction (_make_number).setName ('number')

_ident = pp.Word (pp.alphas, pp.alphanums + '_')
_comment = pp.Regex (r'#.*').suppress ()

_operator = _ident.copy ().setParseAction (_make_operator).setName ('operator')
_string = pp.quotedString.setParseAction (_make_string).setName ('string')
_open_brace = pp.Keyword ('{').suppress ()
_close_brace = pp.Keyword ('}').suppress ()

_boolean = (pp.Keyword ('true').setParseAction (_make_true).setName ('true') ^
            pp.Keyword ('false').setParseAction (_make_false).setName ('false'))

_procedure = pp.Forward ()
operation = pp.ZeroOrMore (_comment ^ _boolean ^ _number ^ _procedure ^ _string ^ _operator)
_procedure << pp.Group (_open_brace + operation + _close_brace).setName ('procedure').setParseAction (_make_procedure)

Example #24

0

Show file

    def __init__(self):
        # supported operators
        operator = pp.Regex(
            r"<=|>=|<>|\!=|==|<|>|not|in|regex_partial|regex_exact|geo_box|geo_radius|geo_polygon|contains_any|contains_all|substr|contains_near|any|contains_substr|near|contains|wildcard"
        ).setName("operator").addParseAction(self.validateOperator)

        # literals
        number = pp.Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?").setName(
            "number")
        numberList = pp.Group(
            pp.Literal('[') + number + pp.ZeroOrMore("," + number) +
            pp.Literal(']')).setName("numberList")
        string = pp.dblQuotedString
        literals = number | numberList | string

        # symbols
        identifier = pp.Regex(
            r"[a-z][a-z_]+(?:\.[a-z][a-z_]+)*").addParseAction(
                self.validateIdentifier).setName("identifier")

        # we'll get there...
        subExpr = pp.Forward()

        # predicates
        stream = pp.Group(pp.Literal("stream") + string).setName("stream")
        exists = pp.Group(identifier + pp.Literal("exists")).setName("exists")

        # boolean predicates
        comparison = pp.Group(identifier + operator + literals
                              | literals + operator +
                              identifier).setName("comparison")

        condition = comparison | stream | exists | subExpr
        subExpr << pp.nestedExpr(content=condition)

        # standard boolean operator precedence
        expr = pp.operatorPrecedence(condition, [
            (
                pp.CaselessLiteral("not"),
                1,
                pp.opAssoc.RIGHT,
            ),
            (
                pp.CaselessLiteral("AND"),
                2,
                pp.opAssoc.LEFT,
            ),
            (
                pp.CaselessLiteral("OR"),
                2,
                pp.opAssoc.LEFT,
            ),
        ])

        # tag "thing" { expr }
        tag = pp.Group(
            pp.Literal("tag") + pp.quotedString +
            pp.nestedExpr("{", "}", expr)).setName("tag")

        # return { expr }
        a_return = pp.Group(
            pp.Literal("return") +
            pp.nestedExpr("{", "}", expr)).setName("return")

        # a single expression or tag [, tag, ...] return { expression }
        parser = expr | (pp.OneOrMore(tag) + a_return)

        # handle multilines
        parser.setDefaultWhitespaceChars(" \t\n\r")

        # handle // comments
        parser.ignore("//" + pp.restOfLine)
        self.parser = parser

Example #25

0

Show file

File: parsers.py Project: xinglong-zhang/MDStudio_gromacs

        return p.parseFile(file_name).asList()
    except pp.ParseException:
        msg = "Error Trying to parse: {} in file: {}".format(p, file_name)
        print(msg)
        raise


def skip_supress(z):
    """Suppress stream until `z`"""
    return pp.Suppress(pp.SkipTo(z))


# parse utils
natural = pp.Word(pp.nums)

float_number = pp.Regex(r'(\-)?(\d+)?(\.)(\d*)?([eE][\-\+]\d+)?')

skipLine = pp.Suppress(skip_supress('\n'))

comment = pp.Suppress(pp.Literal(';')) + skipLine

optional_comment = pp.ZeroOrMore(comment)

word = pp.Word(pp.alphanums + "*")

line = pp.Group(
    pp.OneOrMore(float_number | word) + pp.Optional(comment))

lines = pp.Group(pp.OneOrMore(line))

brackets = pp.Suppress("[") + word + pp.Suppress("]")

Example #26

0

Show file

File: parsers.py Project: x86Labs/amoco

class sparc_syntax:

    divide = False
    noprefix = False

    comment = pp.Regex(r'\#.*')
    symbol  = pp.Regex(r'[A-Za-z_.$][A-Za-z0-9_.$]*').setParseAction(lambda r: env.ext(r[0],size=32))
    mnemo   = pp.LineStart() + symbol + pp.Optional(pp.Literal(',a'))
    mnemo.setParseAction(lambda r: r[0].ref.lower()+''.join(r[1:]))
    integer = pp.Regex(r'[1-9][0-9]*').setParseAction(lambda r: int(r[0],10))
    hexa    = pp.Regex(r'0[xX][0-9a-fA-F]+').setParseAction(lambda r: int(r[0],16))
    octa    = pp.Regex(r'0[0-7]*').setParseAction(lambda r: int(r[0],8))
    bina    = pp.Regex(r'0[bB][01]+').setParseAction(lambda r: int(r[0],2))
    char    = pp.Regex(r"('.)|('\\\\)").setParseAction(lambda r: ord(r[0]))
    number  = integer|hexa|octa|bina|char
    number.setParseAction(lambda r: env.cst(r[0],32))

    term    = symbol|number

    exp     = pp.Forward()

    op_one  = pp.oneOf("- ~")
    op_sig  = pp.oneOf("+ -")
    op_mul  = pp.oneOf("* /")
    op_cmp  = pp.oneOf("== != <= >= < > <>")
    op_bit  = pp.oneOf("^ && || & |")

    operators = [(op_one,1,pp.opAssoc.RIGHT),
                 (op_sig,2,pp.opAssoc.LEFT),
                 (op_mul,2,pp.opAssoc.LEFT),
                 (op_cmp,2,pp.opAssoc.LEFT),
                 (op_bit,2,pp.opAssoc.LEFT),
                ]
    reg = pp.Suppress('%')+pp.NotAny(pp.oneOf('hi lo'))+symbol
    hilo = pp.oneOf('%hi %lo')+pp.Suppress('(')+exp+pp.Suppress(')')
    exp << pp.operatorPrecedence(term|reg|hilo,operators)

    adr = pp.Suppress('[')+exp+pp.Suppress(']')
    mem = adr #+pp.Optional(symbol|imm)
    mem.setParseAction(lambda r: env.mem(r[0]))

    opd = exp|mem|reg
    opds = pp.Group(pp.delimitedList(opd))

    instr = mnemo + pp.Optional(opds) + pp.Optional(comment)

    def action_reg(toks):
        rname = toks[0]
        if rname.ref.startswith('asr'): return env.reg(rname.ref)
        return env.__dict__[rname.ref]

    def action_hilo(toks):
        v = toks[1]
        return env.hi(v) if toks[0]=='%hi' else env.lo(v).zeroextend(32)

    def action_exp(toks):
        tok = toks[0]
        if isinstance(tok,env.exp): return tok
        if len(tok)==2:
            op=tok[0]
            r=tok[1]
            if isinstance(r,list): r=action_exp(r)
            return env.oper(op,r)
        elif len(tok)==3:
            op=tok[1]
            l=tok[0]
            r=tok[2]
            if isinstance(l,list): l=action_exp(l)
            if isinstance(r,list): r=action_exp(r)
            return env.oper(op,l,r)
        else:
            return tok

    def action_instr(toks):
        i = instruction('')
        i.mnemonic = toks[0]
        if len(toks)>1: i.operands = toks[1][0:]
        return asmhelper(i)

    # actions:
    reg.setParseAction(action_reg)
    hilo.setParseAction(action_hilo)
    exp.setParseAction(action_exp)
    instr.setParseAction(action_instr)

Example #27

0

Show file

File: rosettacode.py Project: timgates42/pyparsing

"""

import pyparsing as pp
pp.ParserElement.enablePackrat()

LBRACE, RBRACE, LPAR, RPAR, SEMI = map(pp.Suppress, "{}();")
EQ = pp.Literal('=')

keywords = (WHILE, IF, PRINT, PUTC,
            ELSE) = map(pp.Keyword, "while if print putc else".split())
any_keyword = pp.MatchFirst(keywords)
identifier = ~any_keyword + pp.pyparsing_common.identifier
integer = pp.pyparsing_common.integer
string = pp.QuotedString(
    '"', convertWhitespaceEscapes=False).setName("quoted string")
char = pp.Regex(r"'\\?.'")

expr = pp.infixNotation(identifier | integer | char, [
    (
        pp.oneOf("+ - !"),
        1,
        pp.opAssoc.RIGHT,
    ),
    (
        pp.oneOf("* / %"),
        2,
        pp.opAssoc.LEFT,
    ),
    (
        pp.oneOf("+ -"),
        2,

Example #28

0

Show file

File: utils.py Project: lingxiankong/python-aodhclient

#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.
import re
from six.moves.urllib import parse as urllib_parse

import pyparsing as pp

uninary_operators = ("not", )
binary_operator = (u">=", u"<=", u"!=", u">", u"<", u"=", u"==", u"eq", u"ne",
                   u"lt", u"gt", u"ge", u"le")
multiple_operators = (u"and", u"or")

operator = pp.Regex(u"|".join(binary_operator))
null = pp.Regex("None|none|null").setParseAction(pp.replaceWith(None))
boolean = "False|True|false|true"
boolean = pp.Regex(boolean).setParseAction(lambda t: t[0].lower() == "true")
hex_string = lambda n: pp.Word(pp.hexnums, exact=n)
uuid = pp.Combine(
    hex_string(8) + ("-" + hex_string(4)) * 3 + "-" + hex_string(12))
number = r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?"
number = pp.Regex(number).setParseAction(lambda t: float(t[0]))
identifier = pp.Word(pp.alphas, pp.alphanums + "_")
quoted_string = pp.QuotedString('"') | pp.QuotedString("'")
comparison_term = pp.Forward()
in_list = pp.Group(
    pp.Suppress('[') + pp.Optional(pp.delimitedList(comparison_term)) +
    pp.Suppress(']'))("list")
comparison_term << (null | boolean | uuid | identifier | number

Example #29

0

Show file

File: lucene_grammar.py Project: slayer/duoauthproxy-freebsd

#

import pyparsing as pp
from pyparsing import pyparsing_common as ppc
pp.ParserElement.enablePackrat()

COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(
    pp.Literal, ":[]{}~^")
LPAR, RPAR = map(pp.Suppress, "()")
and_, or_, not_, to_ = map(pp.CaselessKeyword, "AND OR NOT TO".split())
keyword = and_ | or_ | not_ | to_

expression = pp.Forward()

valid_word = pp.Regex(
    r'([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))+').setName(
        "word")
valid_word.setParseAction(lambda t: t[0].replace('\\\\', chr(127)).replace(
    '\\', '').replace(chr(127), '\\'))

string = pp.QuotedString('"')

required_modifier = pp.Literal("+")("required")
prohibit_modifier = pp.Literal("-")("prohibit")
integer = ppc.integer()
proximity_modifier = pp.Group(TILDE + integer("proximity"))
number = ppc.fnumber()
fuzzy_modifier = TILDE + pp.Optional(number, default=0.5)("fuzzy")

term = pp.Forward()
field_name = valid_word().setName("fieldname")