Ejemplos de Word en Python, ejemplos de pyparsing.Word en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: example2.py Proyecto: asdbaihu/pyparsing_ext

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import math
import pyparsing as pp

from pyparsing_ext import *
from pyparsing_ext.pylang import *

# grammar
opTable = arithOpTable
OPUNC = pp.Word('$~', '0123456789+-*/^&%<>=@!~:')
opTable.append({'token':OPUNC})

smallGrammar = ProgrammingGrammarParser(keywords=commonKeywords, 
    constants=[{'token':NUMBER, 'action':NumberAction}, {'token':STRING, 'action':StringAction}, {'token':pp.oneOf('True False'), 'action':BooleAction}], variables = [{'token':IDEN, 'action':VariableAction}], 
    operators=opTable, functions=[{'token':IDEN('function'), 'action':FunctionAction}, {'token':(PUNC('left'), PUNC('right')), 'action':BifixAction}])

# semantics
bifixDict = {('|', '|'): abs, ('[', '_]'): math.floor, ('[', '^]'): math.ceil}
smallDict = arithDict; smallDict.update(bifixDict)
pydict = {'len':len, 'abs':abs, 'min':min, 'max':max,'str':str,'sum':sum, 'tuple':tuple, 'any':any, 'all':all, 'tuple':tuple, 'list':list, 'dict':dict, 'int':int}
smallDict.update(pydict)

# language
smallpyLanguage = ProgrammingLanguage(name="SmallPython", grammar=smallGrammar, calculator=Calculator(dict_=smallDict))
smallpyLanguage.info = {
            'version': '0.0',
            'paths': [],
            'suffix': '.spy'}

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_simple_unit.py Proyecto: jgrey4296/pyparsing

class TestRepetition(PyparsingExpressionTestCase):
    tests = [
        PpTestSpec(
            desc="Match several words",
            expr=(pp.Word("x") | pp.Word("y"))[...],
            text="xxyxxyyxxyxyxxxy",
            expected_list=[
                "xx", "y", "xx", "yy", "xx", "y", "x", "y", "xxx", "y"
            ],
        ),
        PpTestSpec(
            desc="Match several words, skipping whitespace",
            expr=(pp.Word("x") | pp.Word("y"))[...],
            text="x x  y xxy yxx y xyx  xxy",
            expected_list=[
                "x",
                "x",
                "y",
                "xx",
                "y",
                "y",
                "xx",
                "y",
                "x",
                "y",
                "x",
                "xx",
                "y",
            ],
        ),
        PpTestSpec(
            desc="Match several words, skipping whitespace (old style)",
            expr=pp.OneOrMore(pp.Word("x") | pp.Word("y")),
            text="x x  y xxy yxx y xyx  xxy",
            expected_list=[
                "x",
                "x",
                "y",
                "xx",
                "y",
                "y",
                "xx",
                "y",
                "x",
                "y",
                "x",
                "xx",
                "y",
            ],
        ),
        PpTestSpec(
            desc=
            "Match words and numbers - show use of results names to collect types of tokens",
            expr=(pp.Word(pp.alphas)("alpha*")
                  | pp.pyparsing_common.integer("int*"))[...],
            text="sdlfj23084ksdfs08234kjsdlfkjd0934",
            expected_list=["sdlfj", 23084, "ksdfs", 8234, "kjsdlfkjd", 934],
            expected_dict={
                "alpha": ["sdlfj", "ksdfs", "kjsdlfkjd"],
                "int": [23084, 8234, 934],
            },
        ),
        PpTestSpec(
            desc="Using delimited_list (comma is the default delimiter)",
            expr=pp.delimited_list(pp.Word(pp.alphas)),
            text="xxyx,xy,y,xxyx,yxx, xy",
            expected_list=["xxyx", "xy", "y", "xxyx", "yxx", "xy"],
        ),
        PpTestSpec(
            desc=
            "Using delimited_list (comma is the default delimiter) with trailing delimiter",
            expr=pp.delimited_list(pp.Word(pp.alphas),
                                   allow_trailing_delim=True),
            text="xxyx,xy,y,xxyx,yxx, xy,",
            expected_list=["xxyx", "xy", "y", "xxyx", "yxx", "xy"],
        ),
        PpTestSpec(
            desc="Using delimited_list, with ':' delimiter",
            expr=pp.delimited_list(pp.Word(pp.hexnums, exact=2),
                                   delim=":",
                                   combine=True),
            text="0A:4B:73:21:FE:76",
            expected_list=["0A:4B:73:21:FE:76"],
        ),
        PpTestSpec(
            desc="Using delimited_list, with ':' delimiter",
            expr=pp.delimited_list(
                pp.Word(pp.hexnums, exact=2),
                delim=":",
                combine=True,
                allow_trailing_delim=True,
            ),
            text="0A:4B:73:21:FE:76:",
            expected_list=["0A:4B:73:21:FE:76:"],
        ),
    ]

Ejemplo n.º 3

0

Mostrar archivo

C_BRACKET = pp.Literal(']')
O_BRACE = pp.Literal('{')
C_BRACE = pp.Literal('}')
O_PAREN = pp.Literal('(')
C_PAREN = pp.Literal(')')
DOLLAR = pp.Literal('$')
AT = pp.Literal('@')
SLASH = pp.Literal('/')
DBL_VLINE = pp.Literal('||')

#Subtree application and testing
S_APP = pp.Keyword('::', identChars='?!')
S_APP_EX = pp.Keyword('::!', identChars='?')
S_TEST = pp.Keyword('::?')

ARITH = pp.Word('-+*/^%', exact=1)

NAME = pp.Word(pp.alphas)
IG_NAME = pp.Word('_', pp.alphas)
NUM = pp.Word(pp.nums + '-_d/')  #negation, formatting, decimal, and fraction
STRING = pp.dblQuotedString

NON_PATH_VAR = pp.Forward()
PATH_VAR = pp.Forward()

VAR_HEADER = pp.Group(DOLLAR | AT)


PATH_VAR << VAR_HEADER.setResultsName('VAR_SCOPE') + \
    pp.Group(pp.Keyword('..', ' .')).setResultsName('PATH_ACCESS') + \
    pp.Word(pp.alphas + pp.nums).setResultsName('VARNAME') - \

Ejemplo n.º 4

0

Mostrar archivo

    pp.Suppress('.') + _identifier('table_name') + _ignore('ADD CONSTRAINT') +
    _identifier('constraint_name') + _ignore('FOREIGN KEY') +
    pp.Suppress('(') + _identifier('column_name') + pp.Suppress(')') +
    _ignore('REFERENCES') + _identifier('foreign_schema') + pp.Suppress('.') +
    _identifier('foreign_table_name') + pp.Suppress('(') +
    _identifier('foreign_column_name') + pp.Suppress(')'))

create_table_expr: pp.ParseExpression = (
    _ignore('CREATE', 'GLOBAL', 'LOCAL', 'TEMPORARY', 'TEMP', 'UNLOGGED',
            'TABLE', 'IF NOT EXISTS') + _identifier('schema') +
    pp.Suppress('.') + _identifier('table_name') + pp.Suppress('(') +
    pp.delimitedList(
        (pp.Suppress('CONSTRAINT' + pp.Regex(r'[^,]+')))
        | pp.Group(
            _identifier('name') + _identifier('data_type') +
            pp.Suppress(pp.Optional(pp.Word(r'\[\]'))) +
            _ignore('WITHOUT TIME ZONE', 'WITH TIME ZONE', 'PRECISION',
                    'VARYING') + pp.Optional(
                        pp.Suppress('(') + pp.Regex(r'\d+\s*,*\s*\d*') +
                        _ignore('CHAR', 'BYTEST') + pp.Suppress(')')) +
            pp.Suppress(
                pp.Optional(pp.CaselessKeyword('DEFAULT') + 'false' | 'true'))
            + pp.Suppress(
                pp.Optional(
                    pp.Regex(
                        r"(?!--)(\b(COMMENT|DEFAULT)\b\s+[^,]+|([A-Za-z0-9_'\": -]|[^\x01-\x7E])*)",
                        re.IGNORECASE,
                    ), ), ) + pp.Suppress(pp.Optional(pp.Word(r'\[\]'))) +
            pp.Optional(pp.CaselessKeyword('NOT NULL'))
            ('not_null').setParseAction(bool), ), )('columns') +
    pp.Suppress(')'))

Ejemplo n.º 5

0

Mostrar archivo

Archivo: tgrep.py Proyecto: Geolem/nltk

def _build_tgrep_parser(set_parse_actions=True):
    """
    Builds a pyparsing-based parser object for tokenizing and
    interpreting tgrep search strings.
    """
    tgrep_op = pyparsing.Optional("!") + pyparsing.Regex(
        "[$%,.<>][%,.<>0-9-':]*")
    tgrep_qstring = pyparsing.QuotedString(quoteChar='"',
                                           escChar="\\",
                                           unquoteResults=False)
    tgrep_node_regex = pyparsing.QuotedString(quoteChar="/",
                                              escChar="\\",
                                              unquoteResults=False)
    tgrep_qstring_icase = pyparsing.Regex(
        'i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"')
    tgrep_node_regex_icase = pyparsing.Regex(
        "i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/")
    tgrep_node_literal = pyparsing.Regex("[^][ \r\t\n;:.,&|<>()$!@%'^=]+")
    tgrep_expr = pyparsing.Forward()
    tgrep_relations = pyparsing.Forward()
    tgrep_parens = pyparsing.Literal("(") + tgrep_expr + ")"
    tgrep_nltk_tree_pos = (pyparsing.Literal("N(") + pyparsing.Optional(
        pyparsing.Word(pyparsing.nums) + "," + pyparsing.Optional(
            pyparsing.delimitedList(pyparsing.Word(pyparsing.nums), delim=",")
            + pyparsing.Optional(","))) + ")")
    tgrep_node_label = pyparsing.Regex("[A-Za-z0-9]+")
    tgrep_node_label_use = pyparsing.Combine("=" + tgrep_node_label)
    # see _tgrep_segmented_pattern_action
    tgrep_node_label_use_pred = tgrep_node_label_use.copy()
    macro_name = pyparsing.Regex("[^];:.,&|<>()[$!@%'^=\r\t\n ]+")
    macro_name.setWhitespaceChars("")
    macro_use = pyparsing.Combine("@" + macro_name)
    tgrep_node_expr = (tgrep_node_label_use_pred
                       | macro_use
                       | tgrep_nltk_tree_pos
                       | tgrep_qstring_icase
                       | tgrep_node_regex_icase
                       | tgrep_qstring
                       | tgrep_node_regex
                       | "*"
                       | tgrep_node_literal)
    tgrep_node_expr2 = (
        tgrep_node_expr + pyparsing.Literal("=").setWhitespaceChars("") +
        tgrep_node_label.copy().setWhitespaceChars("")) | tgrep_node_expr
    tgrep_node = tgrep_parens | (pyparsing.Optional("'") + tgrep_node_expr2 +
                                 pyparsing.ZeroOrMore("|" + tgrep_node_expr))
    tgrep_brackets = pyparsing.Optional("!") + "[" + tgrep_relations + "]"
    tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node)
    tgrep_rel_conjunction = pyparsing.Forward()
    tgrep_rel_conjunction << (
        tgrep_relation +
        pyparsing.ZeroOrMore(pyparsing.Optional("&") + tgrep_rel_conjunction))
    tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore(
        "|" + tgrep_relations)
    tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations)
    tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional(
        tgrep_relations)
    tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(":" + tgrep_expr_labeled)
    macro_defn = (pyparsing.Literal("@") + pyparsing.White().suppress() +
                  macro_name + tgrep_expr2)
    tgrep_exprs = (
        pyparsing.Optional(macro_defn +
                           pyparsing.ZeroOrMore(";" + macro_defn) + ";") +
        tgrep_expr2 + pyparsing.ZeroOrMore(";" + (macro_defn | tgrep_expr2)) +
        pyparsing.ZeroOrMore(";").suppress())
    if set_parse_actions:
        tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action)
        tgrep_node_label_use_pred.setParseAction(
            _tgrep_node_label_pred_use_action)
        macro_use.setParseAction(_tgrep_macro_use_action)
        tgrep_node.setParseAction(_tgrep_node_action)
        tgrep_node_expr2.setParseAction(_tgrep_bind_node_label_action)
        tgrep_parens.setParseAction(_tgrep_parens_action)
        tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action)
        tgrep_relation.setParseAction(_tgrep_relation_action)
        tgrep_rel_conjunction.setParseAction(_tgrep_conjunction_action)
        tgrep_relations.setParseAction(_tgrep_rel_disjunction_action)
        macro_defn.setParseAction(_macro_defn_action)
        # the whole expression is also the conjunction of two
        # predicates: the first node predicate, and the remaining
        # relation predicates
        tgrep_expr.setParseAction(_tgrep_conjunction_action)
        tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action)
        tgrep_expr2.setParseAction(
            functools.partial(_tgrep_conjunction_action, join_char=":"))
        tgrep_exprs.setParseAction(_tgrep_exprs_action)
    return tgrep_exprs.ignore("#" + pyparsing.restOfLine)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: period_parsing.py Proyecto: jgalar/lttng-analyses

class MalformedExpression(Exception):
    pass


class DuplicatePeriodCapture(Exception):
    def __init__(self, name):
        self._name = name

    def __str__(self):
        return 'Duplicate period capture name: "{}"'.format(self._name)


# common grammar elements
_e = pp.CaselessLiteral('e')
_number = (pp.Combine(
    pp.Word('+-' + pp.nums, pp.nums) +
    pp.Optional('.' + pp.Optional(pp.Word(pp.nums))) +
    pp.Optional(_e +
                pp.Word('+-' + pp.nums, pp.nums))).setResultsName('number'))
_quoted_string = pp.QuotedString('"', '\\').setResultsName('quoted-string')
_identifier = pp.Word(pp.alphas + '_', pp.alphanums + '_').setResultsName('id')
_tph_scope_prefix = (pp.Literal(
    period.DynScope.TPH.value).setResultsName('tph-scope-prefix'))
_spc_scope_prefix = (pp.Literal(
    period.DynScope.SPC.value).setResultsName('spc-scope-prefix'))
_seh_scope_prefix = (pp.Literal(
    period.DynScope.SEH.value).setResultsName('seh-scope-prefix'))
_sec_scope_prefix = (pp.Literal(
    period.DynScope.SEC.value).setResultsName('sec-scope-prefix'))
_ec_scope_prefix = (pp.Literal(
    period.DynScope.EC.value).setResultsName('ec-scope-prefix'))

Ejemplo n.º 7

0

Mostrar archivo

    def __parse_tc_filter_port(text):
        port_pattern = (pp.SkipTo("port=", include=True) + pp.Word(pp.nums))

        return port_pattern.parseString(text)[-1]

Ejemplo n.º 8

0

Mostrar archivo

#

import pyparsing as pp

atomicWeight = {
    "O": 15.9994,
    "H": 1.00794,
    "Na": 22.9897,
    "Cl": 35.4527,
    "C": 12.0107,
}

digits = "0123456789"

# Version 1
element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2)
# for stricter matching, use this Regex instead
# element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|"
#                 "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|"
#                 "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|"
#                 "S[bcegimnr]?|T[abcehilm]|U(u[bhopqst])?|V|W|Xe|Yb?|Z[nr]")
elementRef = pp.Group(element + pp.Optional(pp.Word(digits), default="1"))
formula = elementRef[...]


def sum_atomic_weights(element_list):
    return sum(atomicWeight[elem] * int(qty) for elem, qty in element_list)


formula.runTests(
    """\

Ejemplo n.º 9

0

Mostrar archivo

| min   |   7  |  43  |   7  |  15  |  82  |  98  |   1  |  37  |
| max   |  11  |  52  |  10  |  17  |  85  | 112  |   4  |  39  |
| ave   |   9  |  47  |   8  |  16  |  84  | 106  |   3  |  38  |
| sdev  |   1  |   3  |   1  |   1  |   1  |   3  |   1  |   1  |
+-------+------+------+------+------+------+------+------+------+
"""

# define grammar for datatable
heading = (
    pp.Literal(
        "+-------+------+------+------+------+------+------+------+------+") +
    "|       |  A1  |  B1  |  C1  |  D1  |  A2  |  B2  |  C2  |  D2  |" +
    "+=======+======+======+======+======+======+======+======+======+"
).suppress()
vert = pp.Literal("|").suppress()
number = pp.Word(pp.nums)
rowData = pp.Group(vert + pp.Word(pp.alphas) + vert +
                   pp.delimitedList(number, "|") + vert)
trailing = pp.Literal(
    "+-------+------+------+------+------+------+------+------+------+"
).suppress()

datatable = heading + pp.Dict(pp.ZeroOrMore(rowData)) + trailing

# now parse data and print results
data = datatable.parseString(testData)
print(data)

# shortcut for import pprint; pprint.pprint(data.asList())
data.pprint()

Ejemplo n.º 10

0

Mostrar archivo

def eisen_grammar():
    # define parser grammar
    # this is rough and imposes no structure on float and interger expressions
    # but works if es is properly formed
    fnum = pp.Word(".+-*/()"+pp.nums)
    
    tid = pp.oneOf('x y z s rx ry rz', caseless=True)('tid')
    # these are color transforms that we're ignoring
    cid = pp.oneOf('h hue sat b brightness a alpha m', caseless=True)

    tvalues = pp.OneOrMore(fnum)('tvalues')

    gtrans = pp.Group(tid + tvalues).setResultsName('trans', 
                                                    listAllMatches=True)
    ctrans = cid + tvalues
    c2trans = pp.CaselessKeyword('color') + pp.Word(pp.alphanums + '#')
    c3trans = pp.CaselessKeyword('blend') + pp.Word(pp.alphanums + '#') + fnum
    trans = gtrans | ctrans | c2trans | c3trans

    
    

    rule_name = pp.NotAny(pp.CaselessKeyword('rule')) + \
        pp.Word(pp.alphas, pp.alphanums+'_')

    loop_multiplier = fnum('count') + pp.Suppress('*')
    loop = pp.Group(pp.Optional(loop_multiplier) +
                    pp.Suppress('{') +
                    pp.ZeroOrMore(trans) +
                    pp.Suppress('}')).setResultsName('loop', 
                                                     listAllMatches=True)

    md = pp.oneOf('md maxdepth', caseless=True)
    md_mod = md + fnum('md') + pp.Optional('>' + rule_name('successor_rule'))

    weight = pp.oneOf('w weight', caseless=True)
    w_mod = weight + fnum('wm')
    
    shape_words = pp.oneOf(['box', 'grid', 'sphere', 'line'], caseless=True)
    shape = pp.Combine(shape_words + pp.Optional(pp.Word(pp.alphas + ':')))

    global_md = pp.CaselessKeyword('set') + md \
                                          + fnum('global_md')

    shape_call = (pp.Optional(loop) + 
                  shape('shape')).setResultsName('bcall', listAllMatches=True)
    rule_call = (pp.ZeroOrMore(loop) + 
            rule_name('rule_name')).setResultsName('rcall', listAllMatches=True)
    call = shape_call | rule_call
    rule = pp.Group(pp.Suppress(pp.CaselessKeyword('rule')) +
                    rule_name('name') +
                    (pp.Optional(md_mod) & pp.Optional(w_mod)) +
                    pp.Suppress('{') +
                    pp.OneOrMore(call) +
                    pp.Suppress('}'))

    entry = pp.Group(pp.OneOrMore(call)).setResultsName('entry_calls', 
                                                        listAllMatches=True)
    main = pp.Group(pp.OneOrMore(rule)).setResultsName('rule_defs', 
                                                        listAllMatches=True)
    file_def = pp.Optional(global_md) + entry + main 
    file_def.ignore(pp.cppStyleComment)
    # more stuff to ignore
    set_words = pp.oneOf('seed maxobjects maxsize minsize background ' +
                         'colorpool translation rotation pivot scale ' +
                         'raytracer syncrandom', caseless=True)
    set_ignore = pp.CaselessKeyword('set') + set_words + pp.restOfLine
    file_def.ignore(set_ignore)
    return file_def

Ejemplo n.º 11

0

Mostrar archivo

import pyparsing
from miasm.expression.expression import ExprInt, ExprId, ExprLoc, ExprSlice, \
    ExprMem, ExprCond, ExprCompose, ExprOp, ExprAssign, LocKey

integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t:
                                                        int(t[0]))
hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums)
hex_int = pyparsing.Combine(hex_word).setParseAction(lambda t:
                                                     int(t[0], 16))

str_int_pos = (hex_int | integer)
str_int_neg = (pyparsing.Suppress('-') + \
                   (hex_int | integer)).setParseAction(lambda t: -t[0])

str_int = str_int_pos | str_int_neg

STR_EXPRINT = pyparsing.Suppress("ExprInt")
STR_EXPRID = pyparsing.Suppress("ExprId")
STR_EXPRLOC = pyparsing.Suppress("ExprLoc")
STR_EXPRSLICE = pyparsing.Suppress("ExprSlice")
STR_EXPRMEM = pyparsing.Suppress("ExprMem")
STR_EXPRCOND = pyparsing.Suppress("ExprCond")
STR_EXPRCOMPOSE = pyparsing.Suppress("ExprCompose")
STR_EXPROP = pyparsing.Suppress("ExprOp")
STR_EXPRASSIGN = pyparsing.Suppress("ExprAssign")

LOCKEY = pyparsing.Suppress("LocKey")

STR_COMMA = pyparsing.Suppress(",")
LPARENTHESIS = pyparsing.Suppress("(")
RPARENTHESIS = pyparsing.Suppress(")")

Ejemplo n.º 12

0

Mostrar archivo

Archivo: sccm.py Proyecto: chjs207/EFIC-Extracts-FileHistory-IntelligenCe-

class SCCMParser(text_parser.PyparsingMultiLineTextParser):
    """Parser for Windows System Center Configuration Manager (SCCM) logs."""

    NAME = 'sccm'
    DESCRIPTION = 'Parser for SCCM logs files.'

    _ENCODING = 'utf-8-sig'

    # Increasing the buffer size as SCCM messages are commonly well larger
    # than the default value.
    BUFFER_SIZE = 16384

    LINE_STRUCTURES = []

    _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS
    _ONE_OR_TWO_DIGITS = text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS

    # PyParsing Components used to construct grammars for parsing lines.
    _PARSING_COMPONENTS = {
        'msg_left_delimiter':
        pyparsing.Literal('<![LOG['),
        'msg_right_delimiter':
        pyparsing.Literal(']LOG]!><time="'),
        'year':
        _FOUR_DIGITS.setResultsName('year'),
        'month':
        _ONE_OR_TWO_DIGITS.setResultsName('month'),
        'day':
        _ONE_OR_TWO_DIGITS.setResultsName('day'),
        'fraction_of_second':
        pyparsing.Regex(r'\d{3,7}').setResultsName('fraction_of_second'),
        'utc_offset_minutes':
        pyparsing.Regex(r'[-+]\d{2,3}').setResultsName('utc_offset_minutes'),
        'date_prefix':
        pyparsing.Literal('" date="').setResultsName('date_prefix'),
        'component_prefix':
        pyparsing.Literal('" component="').setResultsName('component_prefix'),
        'component':
        pyparsing.Word(pyparsing.alphanums).setResultsName('component'),
        'text':
        pyparsing.Regex(r'.*?(?=(]LOG]!><time="))',
                        re.DOTALL).setResultsName('text'),
        'line_remainder':
        pyparsing.Regex(r'.*?(?=(\<!\[LOG\[))',
                        re.DOTALL).setResultsName('line_remainder'),
        'lastline_remainder':
        pyparsing.restOfLine.setResultsName('lastline_remainder'),
        'hour':
        _ONE_OR_TWO_DIGITS.setResultsName('hour'),
        'minute':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minute'),
        'second':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('second')
    }

    # Base grammar for individual log event lines.
    LINE_GRAMMAR_BASE = (
        _PARSING_COMPONENTS['msg_left_delimiter'] +
        _PARSING_COMPONENTS['text'] +
        _PARSING_COMPONENTS['msg_right_delimiter'] +
        _PARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') +
        _PARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') +
        _PARSING_COMPONENTS['second'] + pyparsing.Suppress('.') +
        _PARSING_COMPONENTS['fraction_of_second'] +
        _PARSING_COMPONENTS['date_prefix'] + _PARSING_COMPONENTS['month'] +
        pyparsing.Suppress('-') + _PARSING_COMPONENTS['day'] +
        pyparsing.Suppress('-') + _PARSING_COMPONENTS['year'] +
        _PARSING_COMPONENTS['component_prefix'] +
        _PARSING_COMPONENTS['component'])

    # Grammar for individual log event lines with a minutes offset from UTC.
    LINE_GRAMMAR_OFFSET = (
        _PARSING_COMPONENTS['msg_left_delimiter'] +
        _PARSING_COMPONENTS['text'] +
        _PARSING_COMPONENTS['msg_right_delimiter'] +
        _PARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') +
        _PARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') +
        _PARSING_COMPONENTS['second'] + pyparsing.Suppress('.') +
        _PARSING_COMPONENTS['fraction_of_second'] +
        _PARSING_COMPONENTS['utc_offset_minutes'] +
        _PARSING_COMPONENTS['date_prefix'] + _PARSING_COMPONENTS['month'] +
        pyparsing.Suppress('-') + _PARSING_COMPONENTS['day'] +
        pyparsing.Suppress('-') + _PARSING_COMPONENTS['year'] +
        _PARSING_COMPONENTS['component_prefix'] +
        _PARSING_COMPONENTS['component'])

    LINE_STRUCTURES = [
        ('log_entry',
         LINE_GRAMMAR_BASE + _PARSING_COMPONENTS['line_remainder']),
        ('log_entry_at_end', LINE_GRAMMAR_BASE +
         _PARSING_COMPONENTS['lastline_remainder'] + pyparsing.lineEnd),
        ('log_entry_offset',
         LINE_GRAMMAR_OFFSET + _PARSING_COMPONENTS['line_remainder']),
        ('log_entry_offset_at_end', LINE_GRAMMAR_OFFSET +
         _PARSING_COMPONENTS['lastline_remainder'] + pyparsing.lineEnd)
    ]

    def _GetISO8601String(self, structure):
        """Retrieves an ISO8601 date time string from the structure.

    The date and time values in the SCCM log are formatted as:
    time="19:33:19.766-330" date="11-28-2014"

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Returns:
      str: ISO 8601 date time string.

    Raises:
      ValueError: if the structure cannot be converted into a date time string.
    """
        fraction_of_second = self._GetValueFromStructure(
            structure, 'fraction_of_second')
        fraction_of_second_length = len(fraction_of_second)
        if fraction_of_second_length not in (3, 6, 7):
            raise ValueError(
                'unsupported time fraction of second length: {0:d}'.format(
                    fraction_of_second_length))

        try:
            fraction_of_second = int(fraction_of_second, 10)
        except (TypeError, ValueError) as exception:
            raise ValueError(
                'unable to determine fraction of second with error: {0!s}'.
                format(exception))

        # TODO: improve precision support, but for now ignore the 100ns precision.
        if fraction_of_second_length == 7:
            fraction_of_second, _ = divmod(fraction_of_second, 10)

        year = self._GetValueFromStructure(structure, 'year')
        month = self._GetValueFromStructure(structure, 'month')
        day_of_month = self._GetValueFromStructure(structure, 'day')
        hours = self._GetValueFromStructure(structure, 'hour')
        minutes = self._GetValueFromStructure(structure, 'minute')
        seconds = self._GetValueFromStructure(structure, 'second')

        date_time_string = '{0:04d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}'.format(
            year, month, day_of_month, hours, minutes, seconds)

        if fraction_of_second_length > 0:
            date_time_string = '{0:s}.{1:d}'.format(date_time_string,
                                                    fraction_of_second)

        utc_offset_minutes = self._GetValueFromStructure(
            structure, 'utc_offset_minutes')
        if utc_offset_minutes is not None:
            try:
                time_zone_offset = int(utc_offset_minutes[1:], 10)
            except (IndexError, ValueError) as exception:
                raise ValueError(
                    'Unable to parse time zone offset with error: {0!s}.'.
                    format(exception))

            time_zone_hours, time_zone_minutes = divmod(time_zone_offset, 60)
            date_time_string = '{0:s}{1:s}{2:02d}:{3:02d}'.format(
                date_time_string, utc_offset_minutes[0], time_zone_hours,
                time_zone_minutes)

        return date_time_string

    def ParseRecord(self, parser_mediator, key, structure):
        """Parse the record and return an SCCM log event object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in ('log_entry', 'log_entry_at_end', 'log_entry_offset',
                       'log_entry_offset_at_end'):
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        try:
            date_time_string = self._GetISO8601String(structure)
        except ValueError as exception:
            parser_mediator.ProduceExtractionWarning(
                'unable to determine date time string with error: {0!s}'.
                format(exception))

        fraction_of_second = self._GetValueFromStructure(
            structure, 'fraction_of_second')
        fraction_of_second_length = len(fraction_of_second)
        if fraction_of_second_length == 3:
            date_time = dfdatetime_time_elements.TimeElementsInMilliseconds()
        elif fraction_of_second_length in (6, 7):
            date_time = dfdatetime_time_elements.TimeElementsInMicroseconds()

        try:
            date_time.CopyFromStringISO8601(date_time_string)
        except ValueError as exception:
            parser_mediator.ProduceExtractionWarning(
                'unable to parse date time value: {0:s} with error: {1!s}'.
                format(date_time_string, exception))
            return

        event_data = SCCMLogEventData()
        event_data.component = self._GetValueFromStructure(
            structure, 'component')
        # TODO: pass line number to offset or remove.
        event_data.offset = 0
        event_data.text = self._GetValueFromStructure(structure, 'text')

        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_WRITTEN)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, parser_mediator, lines):
        """Verifies whether content corresponds to an SCCM log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        # Identify the token to which we attempt a match.
        match = self._PARSING_COMPONENTS['msg_left_delimiter'].match

        # Because logs files can lead with a partial event,
        # we can't assume that the first character (post-BOM)
        # in the file is the beginning of our match - so we
        # look for match anywhere in lines.
        return match in lines

Ejemplo n.º 13

0

Mostrar archivo

Archivo: poshdeob.py Proyecto: vmray/DC3-MWCP

def _gen_parser():
    r"""
    Generates PyParsing grammar for parsing common powershell operations.

    Tests:
    >>> parser = _gen_parser()
    >>> parser.parseString("'{1} {0}'-f 'world','hello'")
    (['hello world'], {})
    >>> parser.parseString('''
    ...     'fGshellolNRfGs'-rEplaCE  ((([cHaR]108+[cHaR]78+[cHaR]82))),'!' .rePLace('fGs',[cHaR]96)''')
    (['`hello!`'], {})
    >>> parser.parseString("'ATBZCFD'-spLIT 'Z'-SPLIT'T'  -spLiT 'F'")
    (['A', 'B', 'C', 'D'], {})
    >>> parser.parseString("$ENv:PuBlIc[13]")
    (['i'], {})
    >>> parser.parseString("('h', 'e', 'l', 'lo')-JOIn ''")
    (['hello'], {})
    >>> parser.parseString("'he`llo'")
    (['hello'], {})
    >>> parser.parseString("'FOtestingFO'.RePLaCE('FO','`')")
    (['`testing`'], {})
    """
    char = ("[" + pp.CaselessKeyword("char") + "]" +
            pp.Word(pp.nums)("num")).setParseAction(lambda t: chr(int(t.num)))
    string = ((pp.Suppress("'") + "`" + pp.Suppress("'"))
              | (pp.Suppress('"') + "`" + pp.Suppress('"'))
              | pp.QuotedString("'",
                                escChar="`",
                                escQuote="''",
                                multiline=True,
                                convertWhitespaceEscapes=False)
              | pp.QuotedString('"',
                                escChar="`",
                                escQuote='""',
                                multiline=True,
                                convertWhitespaceEscapes=False))
    variable = ("$" + pp.oneOf(_VARIABLE_LOOKUP.keys(), caseless=True)("var")
                ).setParseAction(lambda t: _VARIABLE_LOOKUP[t.var.lower()])

    _string = pp.Suppress(
        pp.Optional("[" + pp.CaselessKeyword("string") + "]")) + OptionalParen(
            pp.Suppress(pp.Optional("[" + pp.CaselessKeyword("string") +
                                    "]")) + string | char | variable)
    concat_string = OptionalParen(
        pp.delimitedList(OptionalParen(_string),
                         delim="+").setParseAction(lambda t: "".join(t)))

    # TODO: Support ranges and other fancy indexing.
    indexing = (concat_string("data") + "[" +
                pp.delimitedList(pp.Word(pp.nums))("indices") +
                "]").setParseAction(_indexing)

    # Combine used to enforce there is no space between "c" and "replace"
    _replace_command = pp.Combine(
        pp.Optional(pp.CaselessLiteral("c")) +
        pp.CaselessKeyword("replace"))("command")
    string_replace = (concat_string("data") + pp.OneOrMore(
        pp.Group((pp.Combine("-" + _replace_command) + concat_string("old") +
                  "," + concat_string("new"))
                 | ("." + ("'" + _replace_command + "'"
                           | '"' + _replace_command + '"' | _replace_command) +
                    ("(" + concat_string("old") + "," + concat_string("new") +
                     ")"))))("replace")).setParseAction(_string_replace)

    string_format = (concat_string("format_string") + pp.OneOrMore(
        pp.Group(
            pp.CaselessKeyword("-f") + pp.delimitedList(concat_string)
            ("params")))("format")).setParseAction(_string_format)

    split = (concat_string("data") + pp.OneOrMore(
        pp.Group((pp.CaselessKeyword("-split") + concat_string("delimiters"))
                 | ("." + pp.CaselessKeyword("split") + "(" +
                    concat_string("delimiters") + ")")))("split")
             ).setParseAction(_split)

    join = (OptionalParen(pp.delimitedList(concat_string)("string_list")) +
            pp.CaselessKeyword("-join") + concat_string("join_string")
            ).setParseAction(lambda t: t.join_string.join(t.string_list))

    join_unary = (
        (pp.CaselessKeyword("-join") | pp.CaselessKeyword("[string]::join")) +
        "(" + OptionalParen(pp.delimitedList(concat_string)("string_list")) +
        ")").setParseAction(lambda t: "".join(t.string_list))

    # fmt: off
    poss_elements = OptionalParen(string_format
                                  | string_replace
                                  | split
                                  | join_unary
                                  | join
                                  | indexing
                                  | concat_string)
    # fmt: on

    return poss_elements

Ejemplo n.º 14

0

Mostrar archivo

Archivo: advanced_search.py Proyecto: sevdog/rumal


class OrNode(Node):
    pass  # Or operator between comparison group


class MultipleNode(Node):
    pass


# GRAMMAR
and_operator = pyparsing.oneOf(['and', '&'], caseless=True)
or_operator = pyparsing.oneOf(['or', '|'], caseless=True)

ident = pyparsing.Word(pyparsing.alphanums + '.' + '/' + ':' + '_' + '-' +
                       '*' +
                       '^').setParseAction(lambda t: t[0].replace('_', ' '))

# OPERATORS
equal_exact = pyparsing.Keyword('==', caseless=True)  # exact match
equal_contains = pyparsing.Keyword('=', caseless=True).setParseAction(
    lambda t: '$regex')  # contains match
regex = pyparsing.Keyword('~', caseless=True).setParseAction(
    lambda t: '$regex')  # regex match
greater_than_equal = pyparsing.Keyword('>=', caseless=True).setParseAction(
    lambda t: '$gte')  # greater than or equal
greater_than = pyparsing.Keyword('>', caseless=True).setParseAction(
    lambda t: '$gt')  # greater than
lower_than_equal = pyparsing.Keyword('<=', caseless=True).setParseAction(
    lambda t: '$lte')  # lower than or equal
lower_than = pyparsing.Keyword('<', caseless=True).setParseAction(

Ejemplo n.º 15

0

Mostrar archivo

Archivo: mac_appfirewall.py Proyecto: Team-Firebugs/plaso

class MacAppFirewallParser(text_parser.PyparsingSingleLineTextParser):
  """Parse text based on appfirewall.log file."""

  NAME = u'mac_appfirewall_log'
  DESCRIPTION = u'Parser for appfirewall.log files.'

  ENCODING = u'utf-8'

  # Define how a log line should look like.
  # Example: 'Nov  2 04:07:35 DarkTemplar-2.local socketfilterfw[112] '
  #          '<Info>: Dropbox: Allow (in:0 out:2)'
  # INFO: process_name is going to have a white space at the beginning.

  DATE_TIME = pyparsing.Group(
      text_parser.PyparsingConstants.THREE_LETTERS.setResultsName(u'month') +
      text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(u'day') +
      text_parser.PyparsingConstants.TIME_ELEMENTS)

  FIREWALL_LINE = (
      DATE_TIME.setResultsName(u'date_time') +
      pyparsing.Word(pyparsing.printables).setResultsName(u'computer_name') +
      pyparsing.Word(pyparsing.printables).setResultsName(u'agent') +
      pyparsing.Literal(u'<').suppress() +
      pyparsing.CharsNotIn(u'>').setResultsName(u'status') +
      pyparsing.Literal(u'>:').suppress() +
      pyparsing.CharsNotIn(u':').setResultsName(u'process_name') +
      pyparsing.Literal(u':') +
      pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'action'))

  # Repeated line.
  # Example: Nov 29 22:18:29 --- last message repeated 1 time ---

  REPEATED_LINE = (
      DATE_TIME.setResultsName(u'date_time') +
      pyparsing.Literal(u'---').suppress() +
      pyparsing.CharsNotIn(u'---').setResultsName(u'process_name') +
      pyparsing.Literal(u'---').suppress())

  LINE_STRUCTURES = [
      (u'logline', FIREWALL_LINE),
      (u'repeated', REPEATED_LINE)]

  def __init__(self):
    """Initializes a parser object."""
    super(MacAppFirewallParser, self).__init__()
    self._last_month = 0
    self._previous_structure = None
    self._year_use = 0

  def _GetTimeElementsTuple(self, structure):
    """Retrieves a time elements tuple from the structure.

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Returns:
      tuple: contains:
        year (int): year.
        month (int): month, where 1 represents January.
        day_of_month (int): day of month, where 1 is the first day of the month.
        hours (int): hours.
        minutes (int): minutes.
        seconds (int): seconds.
    """
    month, day, hours, minutes, seconds = structure.date_time

    # Note that dfdatetime_time_elements.TimeElements will raise ValueError
    # for an invalid month.
    month = timelib.MONTH_DICT.get(month.lower(), 0)

    if month != 0 and month < self._last_month:
      # Gap detected between years.
      self._year_use += 1

    return (self._year_use, month, day, hours, minutes, seconds)

  def _ParseLogLine(self, parser_mediator, structure, key):
    """Parse a single log line and produce an event object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
    time_elements_tuple = self._GetTimeElementsTuple(structure)

    try:
      date_time = dfdatetime_time_elements.TimeElements(
          time_elements_tuple=time_elements_tuple)
    except ValueError:
      parser_mediator.ProduceExtractionError(
          u'invalid date time value: {0!s}'.format(structure.date_time))
      return

    self._last_month = time_elements_tuple[1]

    # If the actual entry is a repeated entry, we take the basic information
    # from the previous entry, but using the timestmap from the actual entry.
    if key == u'logline':
      self._previous_structure = structure
    else:
      structure = self._previous_structure

    # Pyparsing reads in RAW, but the text is in UTF8.
    try:
      action = structure.action.decode(u'utf-8')
    except UnicodeDecodeError:
      logging.warning(
          u'Decode UTF8 failed, the message string may be cut short.')
      action = structure.action.decode(u'utf-8', u'ignore')

    event_data = MacAppFirewallLogEventData()
    event_data.action = action
    event_data.agent = structure.agent
    event_data.computer_name = structure.computer_name
    # Due to the use of CharsNotIn pyparsing structure contains whitespaces
    # that need to be removed.
    event_data.process_name = structure.process_name.strip()
    event_data.status = structure.status

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_ADDED)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key not in (u'logline', u'repeated'):
      raise errors.ParseError(
          u'Unable to parse record, unknown structure: {0:s}'.format(key))

    self._ParseLogLine(parser_mediator, structure, key)

  def VerifyStructure(self, parser_mediator, line):
    """Verify that this file is a Mac AppFirewall log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (bytes): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
    self._last_month = 0
    self._year_use = parser_mediator.GetEstimatedYear()

    try:
      structure = self.FIREWALL_LINE.parseString(line)
    except pyparsing.ParseException as exception:
      logging.debug((
          u'Unable to parse file as a Mac AppFirewall log file with error: '
          u'{0:s}').format(exception))
      return False

    if structure.action != u'creating /var/log/appfirewall.log':
      logging.debug(
          u'Not a Mac AppFirewall log file, invalid action: {0!s}'.format(
              structure.action))
      return False

    if structure.status != u'Error':
      logging.debug(
          u'Not a Mac AppFirewall log file, invalid status: {0!s}'.format(
              structure.status))
      return False

    time_elements_tuple = self._GetTimeElementsTuple(structure)

    try:
      dfdatetime_time_elements.TimeElements(
          time_elements_tuple=time_elements_tuple)
    except ValueError:
      logging.debug((
          u'Not a Mac AppFirewall log file, invalid date and time: '
          u'{0!s}').format(structure.date_time))
      return False

    self._last_month = time_elements_tuple[1]

    return True

Ejemplo n.º 16

0

Mostrar archivo

 def symbol(init_chars: str) -> pp.ParserElement:
     rv = pp.Combine(pp.Optional('_') +
                     pp.Word(init_chars, pp.alphanums + '_'))
     return ~ReservedSymbols + rv

Ejemplo n.º 17

0

Mostrar archivo

import pyparsing
import re
import functools

# Code generation specific libraries
import common

#-----------------------------------
# Definitions
#-----------------------------------

# Define default value if a named token has not been found, and thus not been set
TokenNotSet = ''

# Define identifier
Identifier = pyparsing.Word(pyparsing.alphas, pyparsing.alphanums + '_')
TypeIdentifier = pyparsing.Combine(
    Identifier + pyparsing.Optional(pyparsing.Literal('.') + Identifier))

# Define literals
Semicolon = pyparsing.Literal(';').suppress()
Pointer = pyparsing.Literal('*')
OpenBracket = pyparsing.Literal('[').suppress()
CloseBracket = pyparsing.Literal(']').suppress()
DotDot = pyparsing.Literal('..').suppress()
Equal = pyparsing.Literal('=').suppress()

# Define keywords
KeywordDirIn = pyparsing.Keyword(common.DIR_IN)
KeywordDirOut = pyparsing.Keyword(common.DIR_OUT)
Direction = KeywordDirIn | KeywordDirOut

Ejemplo n.º 18

0

Mostrar archivo

Archivo: analysis.py Proyecto: WorkOfArtiz/serpend

import sys, re

# standard string format (used if no format is explicitely given)
STANDARD_STRING_FORMAT = "[$__REALTIME_TIMESTAMP] $MESSAGE"
"""
GRAMMAR
"""
# types of patterns, annotated
PAT_NR, PAT_NE, PAT_LT, PAT_ST, PAT_LE, PAT_SE, PAT_STR, PAT_STAR, PAT_AVAIL, PAT_NEG, PAT_REG = [
    'PAT_%s' % s
    for s in 'NR NR_NE NR_LT NR_ST NR_LE NR_SE STR STAR AVAIL NEG REG'.split()
]

# Basic types
hexadecimal = (pp.Suppress('0x') +
               pp.Word(pp.hexnums)).setParseAction(lambda x: int(x[0], 16))
octogonal = ('0' + pp.Optional(
    pp.Word("01234567"))).setParseAction(lambda x: int("".join(x), 8))
decimal = pp.Word(pp.nums).setParseAction(lambda x: int(x[0], 10))
string = pp.QuotedString('"', escChar='\\',
                         unquoteResults=True) | pp.QuotedString(
                             "'", escChar='\\', unquoteResults=True)

# Pattern types
pat_number = (hexadecimal | octogonal
              | decimal).setParseAction(lambda x: (PAT_NR, x[0]))
pat_ne = (pp.Suppress('!=') + (hexadecimal | octogonal | decimal)
          ).setParseAction(lambda x: (PAT_NE, x[0]))
pat_lt = (pp.Suppress('>') + (hexadecimal | octogonal | decimal)
          ).setParseAction(lambda x: (PAT_LT, x[0]))
pat_st = (pp.Suppress('<') + (hexadecimal | octogonal | decimal)

Ejemplo n.º 19

0

Mostrar archivo

    def __parse_tc_filter_network(text):
        network_pattern = (pp.SkipTo("network=", include=True) +
                           pp.Word(pp.alphanums + "." + "/"))

        return network_pattern.parseString(text)[-1]

Ejemplo n.º 20

0

Mostrar archivo

Archivo: influxdb.py Proyecto: sum12/gnocchi

boolean = pyparsing.Regex(boolean).setParseAction(
    lambda t: t[0].lower()[0] == "t")

quoted_string = pyparsing.QuotedString('"', escChar="\\")
unquoted_string = pyparsing.OneOrMore(
    pyparsing.CharsNotIn(" ,=\\") + pyparsing.Optional(
        pyparsing.OneOrMore((
            pyparsing.Literal("\\ ") | pyparsing.Literal("\\,")
            | pyparsing.Literal("\\=") | pyparsing.Literal("\\")
        ).setParseAction(lambda s, loc, tok: tok[0][-1])))).setParseAction(
            lambda s, loc, tok: "".join(list(tok)))
measurement = tag_key = tag_value = field_key = quoted_string | unquoted_string
number = r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?"
number = pyparsing.Regex(number).setParseAction(
    lambda s, loc, tok: float(tok[0]))
integer = (pyparsing.Word(
    pyparsing.nums).setParseAction(lambda s, loc, tok: int(tok[0])) +
           pyparsing.Suppress("i"))
field_value = integer | number | quoted_string
timestamp = pyparsing.Word(pyparsing.nums).setParseAction(
    lambda s, loc, tok: numpy.datetime64(int(tok[0]), 'ns'))

line_protocol = (
    measurement +
    # Tags
    pyparsing.Optional(pyparsing.Suppress(",") + pyparsing.delimitedList(
        pyparsing.OneOrMore(
            pyparsing.Group(tag_key + pyparsing.Suppress("=") + tag_value),
            ",")).setParseAction(lambda s, loc, tok: dict(list(tok))),
                       default={}) + pyparsing.Suppress(" ") +
    # Fields
    pyparsing.delimitedList(

Ejemplo n.º 21

0

Mostrar archivo

Archivo: enum.py Proyecto: ai-abhinav/gtool

    def __init__(self, obj, config=str()):

        #print(config)

        #_config = """input = '@text1', mapping = 'low = 1, medium = 2, high = 3'"""

        super(Enum, self).__init__(obj, config=config)

        #print(config)
        #print(self.config)
        #print(config == _config)

        if self.config is None or len(self.config) < 1 or not isinstance(
                self.config, str):
            raise ValueError('Enum plugin function requires a config string')

        inputkeyword = 'input'
        mappingkeyword = 'mapping'

        if not inputkeyword in self.config:
            raise ValueError(
                'A input keyword argument must be specified for the Enum plugin function'
            )

        if not mappingkeyword in self.config:
            raise ValueError(
                'A mapping keyword argument must be specified for the Enum plugin function'
            )

        attrexpr = p.Combine(
            p.Literal("'").suppress() +
            (p.Literal('@') | p.Literal('!')).suppress() +
            p.Word(p.alphanums) + p.Literal("'").suppress())

        inputexpr = p.CaselessKeyword(inputkeyword).suppress() + p.Literal(
            '=').suppress() + attrexpr

        mappingexpr = p.CaselessKeyword(mappingkeyword).suppress() + p.Literal(
            '=').suppress() + p.sglQuotedString()

        expr = inputexpr + p.Literal(',').suppress() + mappingexpr

        self.input = None
        self.mapping = None

        _matches = []

        for x in expr.scanString(self.config):

            _matches.append(x)

        if len(_matches) > 1:
            raise IndexError(
                'There should only be one input and mapping keyword set in the Enum plugin function\'s config but %s was received'
                % _matches)

        #print(_matches)

        rawconfig = _matches[0]

        mappingdict = {}

        for mapitem in rawconfig[0][1][1:-1].split(','):
            k, v = mapitem.split('=')
            mappingdict[k.strip()] = v.strip()

        self.input = rawconfig[0][0]
        self.mapping = pt.trie()
        for k, v in mappingdict.items():
            self.mapping[k.lower()] = v

Ejemplo n.º 22

0

Mostrar archivo

import pyparsing as pp
from bling import ast

expression = pp.Forward()

null = pp.Keyword('null').setParseAction(ast.Null)
boolean = pp.Keyword('true') | pp.Keyword('false')
boolean.setParseAction(ast.Boolean)

number = pp.Combine(
    pp.Optional('-') + ('0' | pp.Word('123456789', pp.nums)) +
    pp.Optional('.' + pp.Word(pp.nums)) +
    pp.Optional(pp.Word('eE', exact=1) + pp.Word(pp.nums + '+-', pp.nums)))
number.setParseAction(ast.Number)

nibble = pp.Word(
    pp.hexnums,
    exact=1).setParseAction(lambda tokens: int(tokens[0] + tokens[0], 16))
byte = pp.Word(pp.hexnums,
               exact=2).setParseAction(lambda tokens: int(tokens[0], 16))
hex_rgb = pp.Suppress('#') + (byte * 3 | nibble * 3)
color = hex_rgb.setParseAction(ast.Color)

literal = null | boolean | number | color


def identifier():
    return pp.Word(pp.alphas + "_")


reference = identifier().setParseAction(ast.Reference)

Ejemplo n.º 23

0

Mostrar archivo

def _identifier(name: str) -> pp.Token:
    return (pp.Optional(pp.Suppress('"')) + pp.Word(pp.alphanums + '_')(name) +
            pp.Optional(pp.Suppress('"')))

Ejemplo n.º 24

0

Mostrar archivo

def identifier():
    return pp.Word(pp.alphas + "_")

Ejemplo n.º 25

0

Mostrar archivo

import pyparsing as pp
from pydbml.definitions.generic import name
from pydbml.definitions.common import _, _c, end, note, note_object
from pydbml.definitions.column import table_column
from pydbml.definitions.index import indexes
from pydbml.classes import Table

pp.ParserElement.setDefaultWhitespaceChars(' \t\r')

alias = pp.WordStart() + pp.Literal('as').suppress() - pp.WordEnd() - name

hex_char = pp.Word(pp.srange('[0-9a-fA-F]'), exact=1)
hex_color = ("#" - (hex_char * 3 ^ hex_char * 6)).leaveWhitespace()
header_color = (pp.CaselessLiteral('headercolor:').suppress() + _ -
                pp.Combine(hex_color)('header_color'))
table_setting = _ + (note('note') | header_color) + _
table_settings = '[' + table_setting + (',' + table_setting)[...] + ']'


def parse_table_settings(s, l, t):
    '''
    [headercolor: #cccccc, note: 'note']
    '''
    result = {}
    if 'note' in t:
        result['note'] = t['note']
    if 'header_color' in t:
        result['header_color'] = t['header_color']
    return result

Ejemplo n.º 26

0

Mostrar archivo

Archivo: cmds.py Proyecto: drpott/trigger35

def _parse_ios_interfaces(data,
                          acls_as_list=True,
                          auto_cleanup=True,
                          skip_disabled=True):
    """
    Walks through a IOS interface config and returns a dict of parts.

    Intended for use by `~trigger.cmds.NetACLInfo.ios_parse()` but was written
    to be portable.

    :param acls_as_list:
        Whether you want acl names as strings instead of list members, e.g.

    :param auto_cleanup:
        Whether you want to pass results through cleanup_results(). Default: ``True``)
        "ABC123" vs. ['ABC123']. (Default: ``True``)

    :param skip_disabled:
        Whether to skip disabled interfaces. (Default: ``True``)
    """
    import pyparsing as pp

    # Setup
    bang = pp.Literal("!").suppress()
    anychar = pp.Word(pp.printables)
    nonbang = pp.Word(''.join([x for x in pp.printables if x != "!"]) +
                      '\n\r\t ')
    comment = bang + pp.restOfLine.suppress()

    #weird things to ignore in foundries
    aaa_line = pp.Literal("aaa").suppress() + pp.restOfLine.suppress()
    module_line = pp.Literal("module").suppress() + pp.restOfLine.suppress()
    startup_line = pp.Literal("Startup").suppress() + pp.restOfLine.suppress()
    ver_line = pp.Literal("ver") + anychar  #+ pp.restOfLine.suppress()
    #using SkipTO instead now

    #foundry example:
    #telnet@olse1-dc5#show  configuration | include ^(interface | ip address | ip access-group | description|!)
    #!
    #Startup-config data location is flash memory
    #!
    #Startup configuration:
    #!
    #ver 07.5.05hT53
    #!
    #module 1 bi-0-port-m4-management-module
    #module 2 bi-8-port-gig-module

    #there is a lot more that foundry is including in the output that should be ignored

    interface_keyword = pp.Keyword("interface")
    unwanted = pp.SkipTo(interface_keyword, include=False).suppress()

    #unwanted = pp.ZeroOrMore(bang ^ comment ^ aaa_line ^ module_line ^ startup_line ^ ver_line)

    octet = pp.Word(pp.nums, max=3)
    ipaddr = pp.Combine(octet + "." + octet + "." + octet + "." + octet)
    address = ipaddr
    netmask = ipaddr
    cidr = pp.Literal("/").suppress() + pp.Word(pp.nums, max=2)

    # Description
    desc_keyword = pp.Keyword("description")
    description = pp.Dict(pp.Group(desc_keyword + pp.Group(pp.restOfLine)))

    # Addresses
    #cisco example:
    # ip address 172.29.188.27 255.255.255.224 secondary
    #
    #foundry example:
    # ip address 10.62.161.187/26

    ipaddr_keyword = pp.Keyword("ip address").suppress()
    secondary = pp.Literal("secondary").suppress()

    #foundry matches on cidr and cisco matches on netmask
    #netmask converted to cidr in cleanup
    ip_tuple = pp.Group(address + (cidr ^ netmask)).setResultsName(
        'addr', listAllMatches=True)
    negotiated = pp.Literal('negotiated')  # Seen on Cisco 886
    ip_address = ipaddr_keyword + (negotiated
                                   ^ ip_tuple) + pp.Optional(secondary)

    addrs = pp.ZeroOrMore(ip_address)

    # ACLs
    acl_keyword = pp.Keyword("ip access-group").suppress()

    # acl_name to be [''] or '' depending on acls_as_list
    acl_name = pp.Group(anychar) if acls_as_list else anychar
    direction = pp.oneOf('in out').suppress()
    acl_in = acl_keyword + pp.FollowedBy(acl_name + pp.Literal('in'))
    acl_in.setParseAction(pp.replaceWith('acl_in'))
    acl_out = acl_keyword + pp.FollowedBy(acl_name + pp.Literal('out'))
    acl_out.setParseAction(pp.replaceWith('acl_out'))

    acl = pp.Dict(pp.Group((acl_in ^ acl_out) + acl_name)) + direction
    acls = pp.ZeroOrMore(acl)

    # Interfaces
    iface_keyword = pp.Keyword("interface").suppress()
    foundry_awesome = pp.Literal(" ").suppress() + anychar
    #foundry exmaple:
    #!
    #interface ethernet 6/6
    # ip access-group 126 in
    # ip address 172.18.48.187 255.255.255.255

    #cisco example:
    #!
    #interface Port-channel1
    # description gear1-mtc : AE1 : iwslbfa1-mtc-sw0 :  : 1x1000 : 172.20.166.0/24 :  :  :
    # ip address 172.20.166.251 255.255.255.0

    interface = pp.Combine(anychar + pp.Optional(foundry_awesome))

    iface_body = pp.Optional(description) + pp.Optional(acls) + pp.Optional(
        addrs) + pp.Optional(acls)
    #foundry's body is acl then ip and cisco's is ip then acl

    iface_info = pp.Optional(unwanted) + iface_keyword + pp.Dict(
        pp.Group(interface + iface_body)) + pp.Optional(pp.SkipTo(bang))

    interfaces = pp.Dict(pp.ZeroOrMore(iface_info))

    # This is where the parsing is actually happening
    try:
        results = interfaces.parseString(data)
    except:  # (ParseException, ParseFatalException, RecursiveGrammarException):
        results = {}

    if auto_cleanup:
        return _cleanup_interface_results(results, skip_disabled=skip_disabled)
    return results

Ejemplo n.º 27

0

Mostrar archivo

Archivo: test_simple_unit.py Proyecto: jgrey4296/pyparsing

class TestCommonHelperExpressions(PyparsingExpressionTestCase):
    tests = [
        PpTestSpec(
            desc="A comma-delimited list of words",
            expr=pp.delimited_list(pp.Word(pp.alphas)),
            text="this, that, blah,foo,   bar",
            expected_list=["this", "that", "blah", "foo", "bar"],
        ),
        PpTestSpec(
            desc="A counted array of words",
            expr=pp.Group(pp.counted_array(pp.Word("ab")))[...],
            text="2 aaa bbb 0 3 abab bbaa abbab",
            expected_list=[["aaa", "bbb"], [], ["abab", "bbaa", "abbab"]],
        ),
        PpTestSpec(
            desc="skipping comments with ignore",
            expr=(pp.pyparsing_common.identifier("lhs") + "=" +
                  pp.pyparsing_common.fnumber("rhs")).ignore(
                      pp.cpp_style_comment),
            text="abc_100 = /* value to be tested */ 3.1416",
            expected_list=["abc_100", "=", 3.1416],
            expected_dict={
                "lhs": "abc_100",
                "rhs": 3.1416
            },
        ),
        PpTestSpec(
            desc=
            "some pre-defined expressions in pyparsing_common, and building a dotted identifier with delimted_list",
            expr=(pp.pyparsing_common.number("id_num") + pp.delimitedList(
                pp.pyparsing_common.identifier, ".", combine=True)("name") +
                  pp.pyparsing_common.ipv4_address("ip_address")),
            text="1001 www.google.com 192.168.10.199",
            expected_list=[1001, "www.google.com", "192.168.10.199"],
            expected_dict={
                "id_num": 1001,
                "name": "www.google.com",
                "ip_address": "192.168.10.199",
            },
        ),
        PpTestSpec(
            desc=
            "using one_of (shortcut for Literal('a') | Literal('b') | Literal('c'))",
            expr=pp.one_of("a b c")[...],
            text="a b a b b a c c a b b",
            expected_list=[
                "a", "b", "a", "b", "b", "a", "c", "c", "a", "b", "b"
            ],
        ),
        PpTestSpec(
            desc="parsing nested parentheses",
            expr=pp.nested_expr(),
            text="(a b (c) d (e f g ()))",
            expected_list=[["a", "b", ["c"], "d", ["e", "f", "g", []]]],
        ),
        PpTestSpec(
            desc="parsing nested braces",
            expr=(pp.Keyword("if") + pp.nested_expr()("condition") +
                  pp.nested_expr("{", "}")("body")),
            text='if ((x == y) || !z) {printf("{}");}',
            expected_list=[
                "if",
                [["x", "==", "y"], "||", "!z"],
                ["printf(", '"{}"', ");"],
            ],
            expected_dict={
                "condition": [[["x", "==", "y"], "||", "!z"]],
                "body": [["printf(", '"{}"', ");"]],
            },
        ),
    ]

Ejemplo n.º 28

0

Mostrar archivo

Archivo: xchatlog.py Proyecto: infosecjosh/plaso

class XChatLogParser(text_parser.PyparsingSingleLineTextParser):
    """Parse XChat log files."""

    NAME = 'xchatlog'
    DESCRIPTION = 'Parser for XChat log files.'

    _ENCODING = 'utf-8'

    # Common (header/footer/body) pyparsing structures.
    # TODO: Only English ASCII timestamp supported ATM, add support for others.

    _WEEKDAY = pyparsing.Group(
        pyparsing.Keyword('Sun') | pyparsing.Keyword('Mon')
        | pyparsing.Keyword('Tue') | pyparsing.Keyword('Wed')
        | pyparsing.Keyword('Thu') | pyparsing.Keyword('Fri')
        | pyparsing.Keyword('Sat'))

    # Header/footer pyparsing structures.
    # Sample: "**** BEGIN LOGGING AT Mon Dec 31 21:11:55 2011".
    # Note that "BEGIN LOGGING" text is localized (default, English) and can be
    # different if XChat locale is different.

    _HEADER_SIGNATURE = pyparsing.Keyword('****')
    _HEADER_DATE_TIME = pyparsing.Group(
        _WEEKDAY.setResultsName('weekday') +
        text_parser.PyparsingConstants.THREE_LETTERS.setResultsName('month') +
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(
            'day') + text_parser.PyparsingConstants.TIME_ELEMENTS +
        text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year'))
    _LOG_ACTION = pyparsing.Group(
        pyparsing.Word(pyparsing.printables) +
        pyparsing.Word(pyparsing.printables) +
        pyparsing.Word(pyparsing.printables))
    _HEADER = (_HEADER_SIGNATURE.suppress() +
               _LOG_ACTION.setResultsName('log_action') +
               _HEADER_DATE_TIME.setResultsName('date_time'))

    # Body (nickname, text and/or service messages) pyparsing structures.
    # Sample: "dec 31 21:11:58 <fpi> ola plas-ing guys!".

    _DATE_TIME = pyparsing.Group(
        text_parser.PyparsingConstants.THREE_LETTERS.setResultsName('month') +
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(
            'day') + text_parser.PyparsingConstants.TIME_ELEMENTS)
    _NICKNAME = pyparsing.QuotedString(
        '<', endQuoteChar='>').setResultsName('nickname')
    _LOG_LINE = (_DATE_TIME.setResultsName('date_time') +
                 pyparsing.Optional(_NICKNAME) +
                 pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text'))

    LINE_STRUCTURES = [
        ('logline', _LOG_LINE),
        ('header', _HEADER),
        ('header_signature', _HEADER_SIGNATURE),
    ]

    def __init__(self):
        """Initializes a parser object."""
        super(XChatLogParser, self).__init__()
        self._last_month = 0
        self._xchat_year = None
        self.offset = 0

    def _GetTimeElementsTuple(self, structure):
        """Retrieves a time elements tuple from the structure.

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Returns:
      tuple: containing:
        year (int): year.
        month (int): month, where 1 represents January.
        day_of_month (int): day of month, where 1 is the first day of the month.
        hours (int): hours.
        minutes (int): minutes.
        seconds (int): seconds.
    """
        month, day, hours, minutes, seconds = structure.date_time

        month = timelib.MONTH_DICT.get(month.lower(), 0)

        if month != 0 and month < self._last_month:
            # Gap detected between years.
            self._xchat_year += 1

        return (self._xchat_year, month, day, hours, minutes, seconds)

    def _ParseHeader(self, parser_mediator, structure):
        """Parses a log header.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
        _, month, day, hours, minutes, seconds, year = structure.date_time

        month = timelib.MONTH_DICT.get(month.lower(), 0)

        time_elements_tuple = (year, month, day, hours, minutes, seconds)

        try:
            date_time = dfdatetime_time_elements.TimeElements(
                time_elements_tuple=time_elements_tuple)
            date_time.is_local_time = True
        except ValueError:
            parser_mediator.ProduceExtractionError(
                'invalid date time value: {0!s}'.format(structure.date_time))
            return

        self._last_month = month

        event_data = XChatLogEventData()

        if structure.log_action[0] == 'BEGIN':
            self._xchat_year = year
            event_data.text = 'XChat start logging'

        elif structure.log_action[0] == 'END':
            self._xchat_year = None
            event_data.text = 'XChat end logging'

        else:
            logger.debug('Unknown log action: {0:s}.'.format(' '.join(
                structure.log_action)))
            return

        event = time_events.DateTimeValuesEvent(
            date_time,
            definitions.TIME_DESCRIPTION_ADDED,
            time_zone=parser_mediator.timezone)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def _ParseLogLine(self, parser_mediator, structure):
        """Parses a log line.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
        if not self._xchat_year:
            return

        time_elements_tuple = self._GetTimeElementsTuple(structure)

        try:
            date_time = dfdatetime_time_elements.TimeElements(
                time_elements_tuple=time_elements_tuple)
            date_time.is_local_time = True
        except ValueError:
            parser_mediator.ProduceExtractionError(
                'invalid date time value: {0!s}'.format(structure.date_time))
            return

        self._last_month = time_elements_tuple[1]

        event_data = XChatLogEventData()
        event_data.nickname = structure.nickname
        # The text string contains multiple unnecessary whitespaces that need to
        # be removed, thus the split and re-join.
        event_data.text = ' '.join(structure.text.split())

        event = time_events.DateTimeValuesEvent(
            date_time,
            definitions.TIME_DESCRIPTION_ADDED,
            time_zone=parser_mediator.timezone)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in ('header', 'header_signature', 'logline'):
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        if key == 'logline':
            self._ParseLogLine(parser_mediator, structure)

        elif key == 'header':
            self._ParseHeader(parser_mediator, structure)

        elif key == 'header_signature':
            # If this key is matched (after others keys failed) we got a different
            # localized header and we should stop parsing until a new good header
            # is found. Stop parsing is done setting xchat_year to 0.
            # Note that the code assumes that LINE_STRUCTURES will be used in the
            # exact order as defined!
            logger.warning('Unknown locale header.')
            self._xchat_year = 0

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a XChat log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
        try:
            structure = self._HEADER.parseString(line)
        except pyparsing.ParseException:
            logger.debug('Not a XChat log file')
            return False

        _, month, day, hours, minutes, seconds, year = structure.date_time

        month = timelib.MONTH_DICT.get(month.lower(), 0)

        time_elements_tuple = (year, month, day, hours, minutes, seconds)

        try:
            dfdatetime_time_elements.TimeElements(
                time_elements_tuple=time_elements_tuple)
        except ValueError:
            logger.debug(
                'Not a XChat log file, invalid date and time: {0!s}'.format(
                    structure.date_time))
            return False

        return True

Ejemplo n.º 29

0

Mostrar archivo

Archivo: popcontest.py Proyecto: vonnopsled/plaso

class PopularityContestParser(text_parser.PyparsingSingleLineTextParser):
    """Parse popularity contest log files."""

    NAME = u'popularity_contest'
    DESCRIPTION = u'Parser for popularity contest log files.'

    _ASCII_PRINTABLES = pyparsing.printables
    _UNICODE_PRINTABLES = u''.join(
        unichr(character) for character in xrange(65536)
        if not unichr(character).isspace())

    MRU = pyparsing.Word(_UNICODE_PRINTABLES).setResultsName(u'mru')
    PACKAGE = pyparsing.Word(_ASCII_PRINTABLES).setResultsName(u'package')
    TAG = pyparsing.QuotedString(u'<',
                                 endQuoteChar=u'>').setResultsName(u'tag')
    TIMESTAMP = text_parser.PyparsingConstants.INTEGER.setResultsName(
        u'timestamp')

    HEADER = (
        pyparsing.Literal(u'POPULARITY-CONTEST-').suppress() +
        text_parser.PyparsingConstants.INTEGER.setResultsName(u'session') +
        pyparsing.Literal(u'TIME:').suppress() + TIMESTAMP +
        pyparsing.Literal(u'ID:').suppress() +
        pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName(u'id') +
        pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName(u'details'))

    FOOTER = (
        pyparsing.Literal(u'END-POPULARITY-CONTEST-').suppress() +
        text_parser.PyparsingConstants.INTEGER.setResultsName(u'session') +
        pyparsing.Literal(u'TIME:').suppress() + TIMESTAMP)

    LOG_LINE = (TIMESTAMP.setResultsName(u'atime') +
                TIMESTAMP.setResultsName(u'ctime') +
                (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG)))

    LINE_STRUCTURES = [
        (u'logline', LOG_LINE),
        (u'header', HEADER),
        (u'footer', FOOTER),
    ]

    _ENCODING = u'UTF-8'

    def _ParseLogLine(self, parser_mediator, structure):
        """Parses an event object from the log line.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      structure: the log line structure object (instance of
                 pyparsing.ParseResults).
    """
        # Required fields are <mru> and <atime> and we are not interested in
        # log lines without <mru>.
        if not structure.mru:
            return

        # The <atime> field (as <ctime>) is always present but could be 0.
        # In case of <atime> equal to 0, we are in <NOFILES> case, safely return
        # without logging.
        if structure.atime:
            # TODO: not doing any check on <tag> fields, even if only informative
            # probably it could be better to check for the expected values.
            event_object = PopularityContestEvent(
                structure.atime,
                eventdata.EventTimestamp.ACCESS_TIME,
                structure.package,
                structure.mru,
                tag=structure.tag)
            parser_mediator.ProduceEvent(event_object)

        if structure.ctime:
            event_object = PopularityContestEvent(
                structure.ctime,
                eventdata.EventTimestamp.ENTRY_MODIFICATION_TIME,
                structure.package,
                structure.mru,
                tag=structure.tag)
            parser_mediator.ProduceEvent(event_object)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      key: An identification string indicating the name of the parsed
           structure.
      structure: A pyparsing.ParseResults object from a line in the
                 log file.
    """
        # TODO: Add anomaly objects for abnormal timestamps, such as when the log
        # timestamp is greater than the session start.
        if key == u'logline':
            self._ParseLogLine(parser_mediator, structure)

        elif key == u'header':
            if not structure.timestamp:
                logging.debug(
                    u'PopularityContestParser, header with invalid timestamp.')
                return

            session = u'{0!s}'.format(structure.session)
            event_object = PopularityContestSessionEvent(
                structure.timestamp,
                session,
                u'start',
                details=structure.details,
                hostid=structure.id)
            parser_mediator.ProduceEvent(event_object)

        elif key == u'footer':
            if not structure.timestamp:
                logging.debug(
                    u'PopularityContestParser, footer with invalid timestamp.')
                return

            session = u'{0!s}'.format(structure.session)
            event_object = PopularityContestSessionEvent(
                structure.timestamp, session, u'end')
            parser_mediator.ProduceEvent(event_object)

        else:
            logging.warning(
                u'PopularityContestParser, unknown structure: {0:s}.'.format(
                    key))

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a Popularity Contest log file.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      line: A single line from the text file.

    Returns:
      True if this is the correct parser, False otherwise.
    """
        try:
            header_struct = self.HEADER.parseString(line)
        except pyparsing.ParseException:
            logging.debug(u'Not a Popularity Contest log file, invalid header')
            return False

        if not timelib.Timestamp.FromPosixTime(header_struct.timestamp):
            logging.debug(
                u'Invalid Popularity Contest log file header timestamp.')
            return False
        return True

Ejemplo n.º 30

0

Mostrar archivo

class SSHSyslogPlugin(interface.SyslogPlugin):
    """A plugin for creating events from syslog message produced by SSH."""

    NAME = 'ssh'
    DATA_FORMAT = 'SSH syslog line'

    REPORTER = 'sshd'

    _AUTHENTICATION_METHOD = (pyparsing.Keyword('password')
                              | pyparsing.Keyword('publickey'))

    _PYPARSING_COMPONENTS = {
        'address':
        text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('address'),
        'authentication_method':
        _AUTHENTICATION_METHOD.setResultsName('authentication_method'),
        'fingerprint':
        pyparsing.Combine(
            pyparsing.Literal('RSA ') +
            pyparsing.Word(':' +
                           pyparsing.hexnums)).setResultsName('fingerprint'),
        'port':
        pyparsing.Word(pyparsing.nums, max=5).setResultsName('port'),
        'protocol':
        pyparsing.Literal('ssh2').setResultsName('protocol'),
        'username':
        pyparsing.Word(pyparsing.alphanums).setResultsName('username'),
    }

    _LOGIN_GRAMMAR = (
        pyparsing.Literal('Accepted') +
        _PYPARSING_COMPONENTS['authentication_method'] +
        pyparsing.Literal('for') + _PYPARSING_COMPONENTS['username'] +
        pyparsing.Literal('from') + _PYPARSING_COMPONENTS['address'] +
        pyparsing.Literal('port') + _PYPARSING_COMPONENTS['port'] +
        _PYPARSING_COMPONENTS['protocol'] + pyparsing.Optional(
            pyparsing.Literal(':') + _PYPARSING_COMPONENTS['fingerprint']) +
        pyparsing.StringEnd())

    _FAILED_CONNECTION_GRAMMAR = (
        pyparsing.Literal('Failed') +
        _PYPARSING_COMPONENTS['authentication_method'] +
        pyparsing.Literal('for') + _PYPARSING_COMPONENTS['username'] +
        pyparsing.Literal('from') + _PYPARSING_COMPONENTS['address'] +
        pyparsing.Literal('port') + _PYPARSING_COMPONENTS['port'] +
        pyparsing.StringEnd())

    _OPENED_CONNECTION_GRAMMAR = (pyparsing.Literal('Connection from') +
                                  _PYPARSING_COMPONENTS['address'] +
                                  pyparsing.Literal('port') +
                                  _PYPARSING_COMPONENTS['port'] +
                                  pyparsing.LineEnd())

    MESSAGE_GRAMMARS = [
        ('login', _LOGIN_GRAMMAR),
        ('failed_connection', _FAILED_CONNECTION_GRAMMAR),
        ('opened_connection', _OPENED_CONNECTION_GRAMMAR),
    ]

    def _ParseMessage(self, parser_mediator, key, date_time, tokens):
        """Produces an event from a syslog body that matched one of the grammars.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the matching grammar.
      date_time (dfdatetime.DateTimeValues): date and time values.
      tokens (dict[str, str]): tokens derived from a syslog message based on
          the defined grammar.

    Raises:
      ValueError: If an unknown key is provided.
    """
        if key not in ('failed_connection', 'login', 'opened_connection'):
            raise ValueError('Unknown grammar key: {0:s}'.format(key))

        if key == 'login':
            event_data = SSHLoginEventData()

        elif key == 'failed_connection':
            event_data = SSHFailedConnectionEventData()

        elif key == 'opened_connection':
            event_data = SSHOpenedConnectionEventData()

        event_data.address = tokens.get('address', None)
        event_data.authentication_method = tokens.get('authentication_method',
                                                      None)
        event_data.body = tokens.get('body', None)
        event_data.fingerprint = tokens.get('fingerprint', None)
        event_data.hostname = tokens.get('hostname', None)
        event_data.pid = tokens.get('pid', None)
        event_data.protocol = tokens.get('protocol', None)
        event_data.port = tokens.get('port', None)
        event_data.reporter = tokens.get('reporter', None)
        event_data.severity = tokens.get('severity', None)
        event_data.username = tokens.get('username', None)

        event = time_events.DateTimeValuesEvent(
            date_time,
            definitions.TIME_DESCRIPTION_WRITTEN,
            time_zone=parser_mediator.timezone)
        parser_mediator.ProduceEventWithEventData(event, event_data)