예제 #1
0
파일: csv.py 프로젝트: cjh1/edp-parser
def _build_csv_parser():
    separator = pp.Suppress(':')
    key = pp.Word(pp.printables, excludeChars=':')
    value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress()

    block_name = key + separator  + pp.LineEnd().suppress()

    key_value = key + separator + value

    header = (pp.LineStart().suppress() +  pp.Word(pp.nums) + pp.ZeroOrMore( pp.White().suppress() + pp.Word(pp.nums)) + pp.LineEnd().suppress())

    csv_header = pp.delimitedList(pp.Word(pp.printables, excludeChars=',')) + pp.LineEnd().suppress()

    csv_row = pp.delimitedList(pp.Word(pp.nums + ';.+-e_') | pp.Literal('custom')) + pp.LineEnd().suppress()

    indent_stack = [1]
    block = pp.Forward()
    block_body = ( block | key_value)

    indented_block = pp.Dict(pp.ungroup(pp.indentedBlock(block_body, indent_stack)))
    block << ( block_name + indented_block | key_value)

    return pp.Optional(header) + pp.ZeroOrMore(pp.Dict(pp.Group(block))).setResultsName('meta') + \
        csv_header.setResultsName('csvHeader') + \
        pp.Group(pp.OneOrMore(pp.Group(csv_row))).setResultsName('csvValues')
예제 #2
0
def _parse(mystr):

    LBRACE, RBRACE, EQUAL = map(pp.Suppress, "{}=")
    field = pp.Word(pp.printables + ' ', excludeChars='[]=')
    field.addParseAction(pp.tokenMap(str.rstrip))
    string = pp.dblQuotedString().setParseAction(pp.removeQuotes)
    number = pp.pyparsing_common.number()
    date_expr = pp.Regex(r'\d\d\d\d-\d\d-\d\d')
    time_expr = pp.Regex(r'\d\d:\d\d:\d\d\.\d\d\d')
    scalar_value = (string | date_expr | time_expr | number)

    list_marker = pp.Suppress("[]")
    value_list = pp.Forward()
    jobject = pp.Forward()

    memberDef1 = pp.Group(field + EQUAL + scalar_value)
    memberDef2 = pp.Group(field + EQUAL + jobject)
    memberDef3 = pp.Group(field + list_marker + EQUAL + LBRACE + value_list +
                          RBRACE)
    memberDef = memberDef1 | memberDef2 | memberDef3

    value_list <<= (pp.delimitedList(scalar_value, ",")
                    | pp.ZeroOrMore(pp.Group(pp.Dict(memberDef2))))
    value_list.setParseAction(lambda t: [pp.ParseResults(t[:])])

    members = pp.OneOrMore(memberDef)
    jobject <<= pp.Dict(LBRACE + pp.ZeroOrMore(memberDef) + RBRACE)
    # force empty jobject to be a dict
    jobject.setParseAction(lambda t: t or {})

    parser = members
    parser = pp.OneOrMore(pp.Group(pp.Dict(memberDef)))

    return parser.parseString(mystr)
예제 #3
0
class TestGroups(PyparsingExpressionTestCase):
    EQ = pp.Suppress('=')
    tests = [
        PpTestSpec(
            desc = "Define multiple results names in groups",
            expr = pp.OneOrMore(pp.Group(pp.Word(pp.alphas)("key")
                                          + EQ
                                          + pp.pyparsing_common.number("value"))),
            text = "range=5280 long=-138.52 lat=46.91",
            expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]],
        ),
        PpTestSpec(
            desc = "Define multiple results names in groups - use Dict to define results names using parsed keys",
            expr = pp.Dict(pp.OneOrMore(pp.Group(pp.Word(pp.alphas)
                                          + EQ
                                          + pp.pyparsing_common.number))),
            text = "range=5280 long=-138.52 lat=46.91",
            expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]],
            expected_dict = {'lat': 46.91, 'long': -138.52, 'range': 5280}
        ),
        PpTestSpec(
            desc = "Define multiple value types",
            expr = pp.Dict(pp.OneOrMore(pp.Group(pp.Word(pp.alphas)
                                          + EQ
                                          + (pp.pyparsing_common.number | pp.oneOf("True False") | pp.QuotedString("'"))
                                        ))),
            text = "long=-122.47 lat=37.82 public=True name='Golden Gate Bridge'",
            expected_list = [['long', -122.47], ['lat', 37.82], ['public', 'True'], ['name', 'Golden Gate Bridge']],
            expected_dict = {'long': -122.47, 'lat': 37.82, 'public': 'True', 'name': 'Golden Gate Bridge'}
        ),
    ]
예제 #4
0
def _build_ana_rcp_parser():
    separator = pp.Suppress(':')
    key = pp.Word(pp.printables, excludeChars=':')
    value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress()

    block_name = key + separator + pp.LineEnd().suppress()
    platemap_keylist = pp.Literal(
        'platemap_comp4plot_keylist') + separator + pp.delimitedList(
            pp.Word(pp.alphas))
    run_ids = pp.Literal('run_ids') + separator + pp.delimitedList(
        pyparsing_common.integer)
    plate_id = (pp.Literal('plate_ids') |
                pp.Literal('plate_id')) + separator + pyparsing_common.integer

    key_value = (platemap_keylist | run_ids | plate_id
                 | key + separator + value)

    indent_stack = [1]
    block = pp.Forward()
    block_body = (block | key_value)

    indented_block = pp.Dict(
        pp.ungroup(pp.indentedBlock(block_body, indent_stack)))
    block << (block_name + indented_block | key_value)

    return pp.OneOrMore(pp.Dict(pp.Group(block)))
예제 #5
0
class TestGroups(PyparsingExpressionTestCase):
    EQ = pp.Suppress("=")
    tests = [
        PpTestSpec(
            desc="Define multiple results names in groups",
            expr=pp.Group(
                pp.Word(pp.alphas)("key") + EQ + pp.pyparsing_common.number("value")
            )[...],
            text="range=5280 long=-138.52 lat=46.91",
            expected_list=[["range", 5280], ["long", -138.52], ["lat", 46.91]],
        ),
        PpTestSpec(
            desc="Define multiple results names in groups - use Dict to define results names using parsed keys",
            expr=pp.Dict(
                pp.Group(pp.Word(pp.alphas) + EQ + pp.pyparsing_common.number)[...]
            ),
            text="range=5280 long=-138.52 lat=46.91",
            expected_list=[["range", 5280], ["long", -138.52], ["lat", 46.91]],
            expected_dict={"lat": 46.91, "long": -138.52, "range": 5280},
        ),
        PpTestSpec(
            desc="Define multiple value types",
            expr=pp.Dict(
                pp.Group(
                    pp.Word(pp.alphas)
                    + EQ
                    + (
                        pp.pyparsing_common.number
                        | pp.oneOf("True False")
                        | pp.QuotedString("'")
                    )
                )[...]
            ),
            text="long=-122.47 lat=37.82 public=True name='Golden Gate Bridge'",
            expected_list=[
                ["long", -122.47],
                ["lat", 37.82],
                ["public", "True"],
                ["name", "Golden Gate Bridge"],
            ],
            expected_dict={
                "long": -122.47,
                "lat": 37.82,
                "public": "True",
                "name": "Golden Gate Bridge",
            },
        ),
    ]
예제 #6
0
def compile():
    LBRACE, RBRACE, LBRACK, RBRACK, COLON = map(pp.Suppress, '{}[]:')

    value = pp.Forward()

    true = pp.Keyword('true').setParseAction(pp.replaceWith(True))
    false = pp.Keyword('false').setParseAction(pp.replaceWith(False))
    null = pp.Keyword('null').setParseAction(pp.replaceWith(None))
    number = (pp.Regex(
        r'-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][-+]?[0-9]+)?').setParseAction(
            pp.tokenMap(float)))
    string = (pp.Regex(
        r'"([ !#-\[\]-\U0010ffff]+'
        r'|\\(?:["\\/bfnrt]|u[0-9A-Fa-f]{4}))*"').setParseAction(
            pp.tokenMap(json_unescape)))

    items = pp.delimitedList(value)
    array = (pp.Group(LBRACK - pp.Optional(items) +
                      RBRACK).setParseAction(lambda t: t.asList()))

    member = pp.Group(string + COLON + value)
    members = pp.delimitedList(member)
    object = (pp.Dict(LBRACE - pp.Optional(members) +
                      RBRACE).setParseAction(lambda t: t.asDict()))

    value << (object | array | string | number | true | false | null)

    json = value('top') + pp.StringEnd()
    json.setDefaultWhitespaceChars(' \t\n\r')
    json.parseWithTabs()

    return lambda s: json.parseString(s)['top']
예제 #7
0
    def __parse(self):
        lbrace, rbrace, semi, quote = map(pyparsing.Suppress, '{};"')
        ip_address = pyparsing.Combine(pyparsing.Word(pyparsing.nums) + ('.' + pyparsing.Word(pyparsing.nums)) * 3)
        hex_int = pyparsing.Word(pyparsing.hexnums, exact=2)
        mac_address = pyparsing.Combine(hex_int + (':' + hex_int) * 5)
        hdw_type = pyparsing.Word(pyparsing.alphanums)
        yyyy_mm_dd = pyparsing.Combine((pyparsing.Word(pyparsing.nums, exact=4) |
                                        pyparsing.Word(pyparsing.nums, exact=2)) +
                                       ('/' + pyparsing.Word(pyparsing.nums, exact=2)) * 2)
        hh_mm_ss = pyparsing.Combine(pyparsing.Word(pyparsing.nums, exact=2) +
                                     (':' + pyparsing.Word(pyparsing.nums, exact=2)) * 2)
        date_ref = pyparsing.oneOf(list("0123456"))("weekday") + yyyy_mm_dd("date") + hh_mm_ss("time")

        def to_datetime(tokens):
            tokens["datetime"] = datetime.strptime("%(date)s %(time)s" % tokens, "%Y/%m/%d %H:%M:%S")

        date_ref.setParseAction(to_datetime)
        starts_stmt = "starts" + date_ref + semi
        ends_stmt = "ends" + (date_ref | "never") + semi
        tstp_stmt = "tstp" + date_ref + semi
        tsfp_stmt = "tsfp" + date_ref + semi
        hdw_stmt = "hardware" + hdw_type("type") + mac_address("mac") + semi
        uid_stmt = "uid" + pyparsing.QuotedString('"')("uid") + semi
        binding_stmt = "binding" + pyparsing.Word(pyparsing.alphanums) + pyparsing.Word(pyparsing.alphanums) + semi
        lease_statement = starts_stmt | ends_stmt | tstp_stmt | tsfp_stmt | hdw_stmt | uid_stmt | binding_stmt
        lease_def = "lease" + ip_address("ipaddress") + lbrace + \
                    pyparsing.Dict(pyparsing.ZeroOrMore(pyparsing.Group(lease_statement))) + rbrace

        with open(self.lease_file, 'r') as file:
            parsed = lease_def.scanString(file.read())

            return parsed
예제 #8
0
def grammar(*, has_complex: bool = False) -> pp.ParserElement:
    """The Getkw recursive grammar.

    Parameters
    ----------
    has_complex: bool
        Whether to include complex numbers. Defaults to `False`.

    Returns
    -------
    A parsing grammar.
    """

    EQ, COMMA = map(pp.Suppress, "=,")
    LBRACE, RBRACE = map(pp.Suppress, "{}")

    # Define key
    key = pp.Word(pp.alphas + "_<>", pp.alphanums + "_<>")

    # A scalar value (bool, int, float, str)
    if has_complex:
        scalar = quoted_str_t ^ complex_t ^ float_t ^ int_t ^ bool_t ^ unquoted_str_t
    else:
        scalar = quoted_str_t ^ float_t ^ int_t ^ bool_t ^ unquoted_str_t
    # Coerce lists to be lists
    list_t = make_list_t(scalar)
    list_t.set_parse_action(lambda t: [t])

    # Define key-value pairs, i.e. our keywords
    pair = pp.Group(key + EQ + list_t) | pp.Group(key + EQ + scalar)

    # Define values and section recursively
    section = pp.Forward()
    values = pp.Forward()
    section << pp.Group(key + LBRACE + values + RBRACE)
    values << pp.Dict(pp.OneOrMore(section | data_t | pair))

    # Define input
    retval = pp.Dict(pp.OneOrMore(section) | pp.OneOrMore(values))

    # Ignore Python (#), C/C++ (/* */ and //), and Fortran (!) style comments
    comment = pp.cpp_style_comment | pp.python_style_comment | fortran_style_comment
    retval.ignore(comment)

    return retval
예제 #9
0
class QueryParser(collections.OrderedDict):
    TAG_TOKEN = (pp.Keyword("tag") + pp.Suppress(":") +
                 pp.Word(pp.alphas, pp.alphanums + "_"))

    SORT_TOKEN = (pp.Keyword("sort") + pp.Suppress(":") +
                  (pp.Keyword("name") | pp.Keyword("date") | TAG_TOKEN) +
                  pp.Optional(
                      pp.Suppress(":") + pp.oneOf("s n d"),
                      default="s",
                  ) + pp.Optional(
                      pp.Suppress(pp.Keyword("order")) + pp.Suppress(":") +
                      (pp.Keyword("asc") | pp.Keyword("desc")),
                      default="desc",
                  ))

    SEARCH_TOKEN = (
        ((pp.Keyword("name") | pp.Keyword("date")) + pp.Suppress(":") +
         (pp.Word(pp.printables)
          | pp.dblQuotedString().setParseAction(pp.removeQuotes)
          | pp.sglQuotedString().setParseAction(pp.removeQuotes))) |
        (TAG_TOKEN + pp.Optional(
            pp.Suppress(":") +
            (pp.Word(pp.printables)
             | pp.dblQuotedString().setParseAction(pp.removeQuotes)
             | pp.sglQuotedString().setParseAction(pp.removeQuotes)))))

    # TODO: date, from, to should be able to grab dates in EXIF tags

    # TODO: support quoted %c datetimes
    # TODO: support quoted datetimes with hour/minute/second individually
    DATETIME = (Date("%Y/%m/%d",
                     DateHints.YEAR | DateHints.MONTH | DateHints.DAY)
                | Date("%Y/%m", DateHints.YEAR | DateHints.MONTH)
                | Date("%Y", DateHints.YEAR))
    FROM_TOKEN = (pp.Keyword("from") + pp.Suppress(":") + DATETIME)
    TO_TOKEN = (pp.Keyword("to") + pp.Suppress(":") + DATETIME)

    TYPE_TOKEN = (pp.Keyword("type") + pp.Suppress(":") +
                  (pp.Keyword("image") | pp.Keyword("video")))

    QUERY_TOKEN = pp.Group(SORT_TOKEN | SEARCH_TOKEN | FROM_TOKEN | TO_TOKEN
                           | TYPE_TOKEN)

    GRAMMAR = pp.Dict(pp.OneOrMore(QUERY_TOKEN))

    def __init__(self, s, grammar=GRAMMAR):
        try:
            r = grammar.setDebug(logging.getLogger().isEnabledFor(
                logging.DEBUG)).parseString(s, parseAll=True)

            logging.debug("search query parse results: %s", r)

            self.update(collections.OrderedDict(r))
        except (Exception, pp.ParseException, pp.RecursiveGrammarException,
                pp.ParseFatalException, pp.ParseSyntaxException) as e:
            raise QueryError("unable to parse query: %s" % e)
예제 #10
0
    def load_steam_folders(self):
        if self.loaded_steam:
            return

        hkey = reg.OpenKey(reg.HKEY_CURRENT_USER, "Software\\Valve\\Steam")
        steam_path = reg.QueryValueEx(hkey, "SteamPath")[0]
        dl_folder = steam_path + "/steamapps/downloading"
        if os.path.isdir(dl_folder):
            self.directories.add(dl_folder)
        # Read the steam vdf file that contains path strings to all
        # game install directories.
        try:
            file = open(steam_path + "/steamapps/LibraryFolders.vdf").read()
        except OSError:
            print("Unable to open {}.".format(
                steam_path + "/steamapps/LibraryFolders.vdf"))
        # parse Valve's weird cfg format (its like a shitty version of JSON)
        # forward declare the value of a key
        value = pp.Forward()
        # expression for our dict structure that looks like: ["key1", value]
        key_value = pp.Group(pp.QuotedString('"') + value)
        # create a parse structure for value so value looks like: c
        expression = pp.Suppress(
            '{') + pp.Dict(pp.ZeroOrMore(key_value)) + pp.Suppress('}')
        # set our value to be a quoted string follow by the structure we defined,
        # looks like this in Python:
        # ["outer_key", { ["inner_key1", value], ["inner_key2", value] } ]
        # we can acess the above as either a dict or array.
        value <<= pp.QuotedString('"') | expression
        parser = pp.Dict(key_value)
        content = parser.parseString(file)
        # get the last pair's key, this should be the last folder numbered folder,
        # so we can use it as our max nr of folders for looping.
        max_folders = int(content["LibraryFolders"][-1][0])

        # loop from 1 to (incl) max folders and use it as a dictionary key to get
        # the value of that key which should be a steam library folder path.
        for i in range(1, max_folders + 1):
            libpath = content["LibraryFolders"][str(i)]
            dlpath = libpath + "\\steamapps\\downloading"
            if os.path.isdir(dlpath):
                self.directories.add(dlpath)
        self.loaded_steam = True
예제 #11
0
def comment_handler(input_string, locn, tokens):
    tokenstr = "\n".join(tokens)
    mod_string = "comment{:d} comment\n(\n{:s}\n)".format(locn, tokenstr)
    mod_def = pp.Dict(
        pp.Group(
            identifier.setResultsName("_name") +
            identifier.setResultsName("_type") + LPAREN +
            pp.Group(pp.OneOrMore(pp.dblSlashComment)).setResultsName("text") +
            RPAREN))
    return mod_def.parseString(mod_string)
예제 #12
0
def register_res_parse_rule(cls):
    begin_function_map = _pp.Keyword("BEGIN_FUNCTION_MAP").suppress()
    begin_data_map = _pp.Keyword("BEGIN_DATA_MAP").suppress()
    end_data_map = _pp.Keyword("END_DATA_MAP").suppress()
    end_function_map = _pp.Keyword("END_FUNCTION_MAP").suppress()

    begin_block = _pp.Keyword("begin").suppress()
    end_block = _pp.Keyword("end").suppress()

    semicolon = _pp.Literal(";").suppress()
    valid_metadata_words = _pp.Word(
        _pp.pyparsing_unicode.Korean.alphas + _pp.alphas + _pp.printables +
        r" \t",
        excludeChars=",;",
    )
    comma_separated_row = _pp.delimitedList(
        valid_metadata_words) + _pp.Optional(semicolon)

    type_tab = {
        "char": str,
        "date": str,
        "long": int,
        "int": int,
        "float": float,
        "double": float,
    }

    def parse_field(x):

        cur_type = type_tab[x[3]]
        return [[
            x[2],
            {
                "desc": x[0],
                "_reserved": x[1],
                "type": cur_type,
                "size": cur_type(x[4]),
            },
        ]]

    function_metadata = comma_separated_row.setResultsName("__FUNC_META")
    data_metadata = comma_separated_row.setResultsName("__DATA_META")
    field_metadata = _pp.Dict(comma_separated_row.setParseAction(parse_field))

    data_block = _pp.Group(data_metadata + begin_block +
                           _pp.ZeroOrMore(~end_block + field_metadata) +
                           end_block)
    data_blocks = _pp.Group(begin_data_map + _pp.OneOrMore(data_block) +
                            end_data_map).setResultsName("DATA_BLOCKS")
    function_block = _pp.Group(begin_function_map + function_metadata +
                               data_blocks +
                               end_function_map).setResultsName("FUNC_BLOCK")

    cls._parser = function_block
    return cls
예제 #13
0
 def aggregatorMetas():
     star = p.Literal('*').suppress()
     metaName = p.Word(p.alphanums)
     metaKeyword = p.Combine(star + metaName).setResultsName('key')
     equals = p.Literal('=').suppress()
     value = p.Word(p.printables + ' ')
     metaValue = (equals + value).setResultsName('value')
     metaDef = p.Dict(
         p.Group(metaKeyword + metaValue) +
         p.Optional(p.LineEnd().suppress()))
     return metaDef
예제 #14
0
파일: simtest.py 프로젝트: turbana/cpu
def grammer():
    lparen = pp.Suppress("(")
    rparen = pp.Suppress(")")
    equal = pp.Suppress("=")
    nl = pp.Suppress(pp.LineEnd())
    reg = pp.Combine("$" + pp.Optional("cr") +
                     pp.Word(pp.srange("[0-7]"), max=1))
    num = pp.Word(pp.srange("[0-9]")).setParseAction(lambda s, l, t: int(t[0]))
    val = pp.Word(
        pp.srange("[0-9a-fA-F]")).setParseAction(lambda s, l, t: int(t[0], 16))
    values = pp.Dict(pp.OneOrMore(pp.Group(reg + equal + val)))
    return num + lparen + values + rparen + nl
예제 #15
0
파일: craft_parser.py 프로젝트: Pebaz/Craft
def craft_parse(text):
    """
	"""
    LineComment = pyp.Combine(pyp.Literal('::') + pyp.restOfLine).suppress()
    BlockComment = pyp.Combine(
        pyp.Literal(':>') + pyp.SkipTo(pyp.Literal('<:')) +
        pyp.Literal('<:')).suppress()
    Comment = BlockComment | LineComment

    BlockComment = pyp.Combine(
        pyp.Literal(':<') + pyp.Combine(
            pyp.NotAny(pyp.Literal(':>')) + pyp.Word(pyp.printables + ' ')) +
        pyp.Literal('>:'))

    Identifier = pyp.Word(pyp.alphanums + '!#$%&()*+,./;<=>?@\\^-_`{|}~')
    Value = (Comment
             | pyp.QuotedString('"')
             | pyp.QuotedString("'")
             | Identifier.addParseAction(_type_cast_value))
    LBRACKET, RBRACKET, COLON = map(pyp.Suppress, '[]:')

    Function = pyp.Forward()
    List = pyp.Forward()

    Function << pyp.Dict(
        pyp.Group(Identifier + pyp.Literal(':') +
                  pyp.Group(LBRACKET + pyp.ZeroOrMore(Comment | Function | List
                                                      | Value) + RBRACKET)))

    List << pyp.Group(LBRACKET + pyp.ZeroOrMore(Comment | Function | List
                                                | Value) + RBRACKET)

    Program = pyp.OneOrMore(Comment | Function)

    # Validate for syntax error messages:
    validator = SourceValidator()
    Value.setParseAction(validator.validate)
    List.setParseAction(validator.validate)
    Identifier.addParseAction(validator.validate)
    #Comment.setParseAction(validator.validate)
    Function.setParseAction(validator.validate)
    Program.setParseAction(validator.validate)

    syntax_error = None
    try:
        return __walk(Program.parseString(text)[0])
    except Exception as e:
        syntax_error = validator.panic()

    # Now raise the exception with a clean stack trace
    raise syntax_error
예제 #16
0
def parse_verbs():
	get = pp.Literal("GET")
	post = pp.Literal("POST")
	options = pp.Literal("OPTIONS")
	put=pp.Literal("PUT")
	patch=pp.Literal("PATCH")
	delete=pp.Literal("DELETE")
	connect=pp.Literal("CONNECT")
	trace=pp.Literal("TRACE")
	track=pp.Literal("TRACK")
	head = pp.Literal("HEAD")
	crappaton = pp.Word(pp.printables).setResultsName('bleed')
	headers = pp.ZeroOrMore(get|post|head|options|put|patch|delete|connect|trace|track).setResultsName('headers')
	verb = pp.Dict(pp.Group((headers+pp.ZeroOrMore(crappaton))).setResultsName('results'))
	return verb
예제 #17
0
    def metaParser():
        # --- meta parser ---
        metaIndicator = p.LineStart() + p.Suppress(p.Literal('*'))
        metaName = p.Word(p.alphanums).setResultsName('metaname')
        metaSeparator = p.Suppress(p.Literal('='))

        # TODO force case insensitivity in attributeMode keyword match
        # TODO add debug names
        # TODO add a conditional debug flag

        metavalue = p.Combine(p.restOfLine() +
                              p.Suppress(p.LineEnd())).setResultsName(
                                  'metavalue')

        metaList = p.Dict(
            p.Group(metaIndicator + metaName + metaSeparator + metavalue))
        return metaList
예제 #18
0
def _construct_fnsuffix_parser():
    atom = pp.Regex(r"[^=,{}()[\]]+")
    value = pp.Forward().setName("value")  # .setDebug()

    key = pp.Regex(r"\w*").setName("key")  # .setDebug()
    item = pp.Dict(pp.Group(key + pp.Suppress("=") + value))
    items = pp.delimitedList(item)
    dict_ = pp.Suppress("{") + items + pp.Suppress("}")

    list_, tuple_, set_ = (o + pp.delimitedList(value, combine=True) + c
                           for o, c in zip(["[", "(", "{"], ["]", ")", "}"]))

    combine_values = [
        pp.Combine(expr) for expr in (list_, tuple_, set_, atom + value)
    ]
    value << (
        pp.quotedString | dict_ | pp.Or(combine_values) | atom
    )  # Caution: brackets are needed because of << operator precedence!!
    return dict_ + pp.StringEnd()
예제 #19
0
class TestGroups(PyparsingExpressionTestCase):
    EQ = pp.Suppress('=')
    tests = [
        PpTestSpec(
            desc = "Define multiple results names in groups",
            expr = pp.OneOrMore(pp.Group(pp.Word(pp.alphas)("key") 
                                          + EQ
                                          + pp.pyparsing_common.number("value"))),
            text = "range=5280 long=-138.52 lat=46.91",
            expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]],
        ),
        PpTestSpec(
            desc = "Define multiple results names in groups - use Dict to define results names using parsed keys",
            expr = pp.Dict(pp.OneOrMore(pp.Group(pp.Word(pp.alphas) 
                                          + EQ
                                          + pp.pyparsing_common.number))),
            text = "range=5280 long=-138.52 lat=46.91",
            expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]],
            expected_dict = {'lat': 46.91, 'long': -138.52, 'range': 5280}
        ),
    ]
예제 #20
0
파일: txt.py 프로젝트: OpenChemistry/edp
def _build_txt_parser():
    separator = pp.Suppress('=')
    key = pp.Literal('%') + pp.Word(pp.printables, excludeChars='=')
    value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress()

    key_value = key + separator + value

    header = (pp.LineStart().suppress() + pp.Word(pp.nums) +
              pp.ZeroOrMore(pp.White().suppress() + pp.Word(pp.nums)) +
              pp.LineEnd().suppress())

    column_heading = pp.Literal('%') + pp.Word(
        pp.printables, excludeChars='=') + separator + value

    txt_row = pp.delimitedList(
        pp.Word(pp.nums + '.+-e_')
        | pp.Literal('custom')) + pp.LineEnd().suppress()

    return pp.Optional(header) + pp.ZeroOrMore(pp.Dict(pp.Group(block))).setResultsName('meta') + \
        column_heading.setResultsName('columnHeading') + \
        pp.Group(pp.OneOrMore(pp.Group(csv_row))).setResultsName('textValues')
예제 #21
0
    def __init__(self):
        COLON, EQUAL, COMMA = map(pp.Literal, ":=,")
        SCOLON, SEQUAL, SCOMMA = map(pp.Suppress, ":=,")
        LPAREN, RPAREN = map(pp.Suppress, "()")
        word_strict = pp.Regex(r"[^\s'\":=]+")
        sgl_quoted_string = pp.QuotedString("'", escChar="\\")
        dbl_quoted_string = pp.QuotedString('"', escChar="\\")
        word = sgl_quoted_string | dbl_quoted_string | word_strict

        date = pp.pyparsing_common.iso8601_date.copy()
        date.setParseAction(pp.pyparsing_common.convertToDate())
        date_expr = date + SCOMMA + date | COMMA + date | date + COMMA
        date_range = LPAREN + date_expr + RPAREN

        filtr_delim = COLON | EQUAL
        filtr_delim_suppress = SCOLON | SEQUAL
        filtr = pp.Group(word_strict + filtr_delim_suppress +
                         date_range) | pp.Group(word_strict + filtr_delim +
                                                word)
        query_patt = pp.Dict(filtr) | word

        self.__expr = query_patt() * (1, )
예제 #22
0
    def get_parser():
        """A Parser that parses the dumped ParamFile attribute by FastPM.

        This must be a result produced by the lua dump module. Must be
        a lua table. e.g.
        { a = 3, b = {0, 1, 2,} }

        (modified from jsonParser.py example under pyparsing)

        When using the parser, convert the attribute from an array of U1 to a string first.
        """
        import pyparsing as pp
        from pyparsing import pyparsing_common as ppc

        def make_keyword(kwd_str, kwd_value):
            return pp.Keyword(kwd_str).setParseAction(
                pp.replaceWith(kwd_value))

        TRUE = make_keyword("true", True)
        FALSE = make_keyword("false", False)
        NULL = make_keyword("nil", None)

        LBRACE, RBRACE, ASSIGN, COMMA = map(pp.Suppress, "{}=,")

        luaName = pp.Word(pp.alphas + "_", pp.alphanums + "_")
        luaString = pp.dblQuotedString().setParseAction(pp.removeQuotes)
        luaNumber = ppc.number()

        luaObject = pp.Forward()
        luaValue = pp.Forward()
        luaElements = pp.delimitedList(luaValue) + pp.Optional(COMMA)
        luaArray = pp.Group(LBRACE + pp.Optional(luaElements, []) + RBRACE)
        luaValue << (luaString | luaNumber | pp.Group(luaObject) | luaArray
                     | TRUE | FALSE | NULL)
        memberDef = pp.Group(luaName + ASSIGN + luaValue)
        luaMembers = pp.delimitedList(memberDef) + pp.Optional(COMMA)
        luaObject << pp.Dict(LBRACE + pp.Optional(luaMembers) + RBRACE)
        return luaObject
예제 #23
0
파일: sample.py 프로젝트: cjh1/edp-parser
def _build_sample_parser():
    separator = pp.Suppress('=')
    key = pp.LineStart() + pp.Literal('%').suppress() + pp.Word(
        pp.printables, excludeChars='=')
    value = pp.Regex(r'[^\r%]*') | pp.Empty() + pp.LineEnd().suppress()

    element = pp.Word(pp.alphas)
    elements = pp.Group(pp.LineStart().suppress() +
                        pp.Literal('%').suppress() + pp.Literal('elements') +
                        separator + element +
                        pp.ZeroOrMore(pp.White(ws='\t ').suppress() +
                                      element) + pp.LineEnd().suppress())

    compositions = pp.Group(
        pp.LineStart().suppress() + pp.Literal('%').suppress() +
        pp.Literal('compositions') + separator + pyparsing_common.number +
        pp.ZeroOrMore(pp.White(ws='\t ').suppress() +
                      pyparsing_common.number) + pp.LineEnd().suppress())

    epoch = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() +
                     pp.Literal('Epoch') + separator +
                     pyparsing_common.number + pp.LineEnd().suppress())

    sample = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() +
                      pp.Literal('Sample') + separator +
                      pyparsing_common.number + pp.LineEnd().suppress())

    key_value = (sample | epoch | elements | compositions
                 | pp.Group(key + separator + value))

    row_separator = pp.White(ws='\t ').suppress()
    row = (pp.LineStart().suppress() + pyparsing_common.number +
           pp.ZeroOrMore(row_separator + pyparsing_common.number) +
           pp.LineEnd().suppress())

    return  pp.OneOrMore(pp.Dict(key_value)).setResultsName('meta') + \
        pp.Group(pp.ZeroOrMore(pp.Group(row))).setResultsName('values')
예제 #24
0
            pp.Group(pp.OneOrMore(pp.dblSlashComment)).setResultsName("text") +
            RPAREN))
    return mod_def.parseString(mod_string)


tor_comment.setParseAction(comment_handler)
tor_comment.setResultsName("_name")
tor_comment.setResultsName("_type")

tor_string = pp.dblQuotedString() | pp.Word(pp.alphas, pp.alphanums + "_-.")
number = pp.pyparsing_common.number()

tor_members = pp.Forward()
tor_value = pp.Forward()

tor_struct = pp.Literal("struct").setResultsName("_type") + LPAREN + pp.Dict(
    tor_members) + RPAREN
tor_sequence = pp.Literal("sequence").setResultsName(
    "_type") + LPAREN + pp.delimitedList(tor_value) + RPAREN
tor_ref = pp.Literal("ref").setResultsName(
    "_type") + LPAREN + identifier + RPAREN
tor_value << (tor_sequence | tor_ref | tor_struct | tor_string
              | pp.Group(number + identifier) | number)

member_def = pp.Dict(pp.Group(identifier + COLON + tor_value))
tor_members << pp.delimitedList(member_def)

object_def = pp.Group(
    identifier.setResultsName("_name") + identifier.setResultsName("_type") +
    pp.Dict(LPAREN + pp.Optional(tor_members) + RPAREN))
tor_object = pp.Dict(object_def | tor_comment)
tor_file = pp.Dict(pp.OneOrMore(tor_object)) + pp.stringEnd
예제 #25
0
NULL = make_keyword("null", None)

LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(pp.Suppress, "[]{}:")

jsonString = pp.dblQuotedString().setParseAction(pp.removeQuotes)
jsonNumber = ppc.number()

jsonObject = pp.Forward()
jsonValue = pp.Forward()
jsonElements = pp.delimitedList(jsonValue)
jsonArray = pp.Group(LBRACK + pp.Optional(jsonElements, []) + RBRACK)
jsonValue << (jsonString | jsonNumber | pp.Group(jsonObject) | jsonArray | TRUE
              | FALSE | NULL)
memberDef = pp.Group(jsonString + COLON + jsonValue)
jsonMembers = pp.delimitedList(memberDef)
jsonObject << pp.Dict(LBRACE + pp.Optional(jsonMembers) + RBRACE)

jsonComment = pp.cppStyleComment
jsonObject.ignore(jsonComment)

if __name__ == "__main__":
    testdata = """
    {
        "glossary": {
            "title": "example glossary",
            "GlossDiv": {
                "title": "S",
                "GlossList":
                    {
                    "ID": "SGML",
                    "SortAs": "SGML",
예제 #26
0
def _parse_ios_interfaces(data,
                          acls_as_list=True,
                          auto_cleanup=True,
                          skip_disabled=True):
    """
    Walks through a IOS interface config and returns a dict of parts.

    Intended for use by `~trigger.cmds.NetACLInfo.ios_parse()` but was written
    to be portable.

    :param acls_as_list:
        Whether you want acl names as strings instead of list members, e.g.

    :param auto_cleanup:
        Whether you want to pass results through cleanup_results(). Default: ``True``)
        "ABC123" vs. ['ABC123']. (Default: ``True``)

    :param skip_disabled:
        Whether to skip disabled interfaces. (Default: ``True``)
    """
    import pyparsing as pp

    # Setup
    bang = pp.Literal("!").suppress()
    anychar = pp.Word(pp.printables)
    nonbang = pp.Word(''.join([x for x in pp.printables if x != "!"]) +
                      '\n\r\t ')
    comment = bang + pp.restOfLine.suppress()

    #weird things to ignore in foundries
    aaa_line = pp.Literal("aaa").suppress() + pp.restOfLine.suppress()
    module_line = pp.Literal("module").suppress() + pp.restOfLine.suppress()
    startup_line = pp.Literal("Startup").suppress() + pp.restOfLine.suppress()
    ver_line = pp.Literal("ver") + anychar  #+ pp.restOfLine.suppress()
    #using SkipTO instead now

    #foundry example:
    #telnet@olse1-dc5#show  configuration | include ^(interface | ip address | ip access-group | description|!)
    #!
    #Startup-config data location is flash memory
    #!
    #Startup configuration:
    #!
    #ver 07.5.05hT53
    #!
    #module 1 bi-0-port-m4-management-module
    #module 2 bi-8-port-gig-module

    #there is a lot more that foundry is including in the output that should be ignored

    interface_keyword = pp.Keyword("interface")
    unwanted = pp.SkipTo(interface_keyword, include=False).suppress()

    #unwanted = pp.ZeroOrMore(bang ^ comment ^ aaa_line ^ module_line ^ startup_line ^ ver_line)

    octet = pp.Word(pp.nums, max=3)
    ipaddr = pp.Combine(octet + "." + octet + "." + octet + "." + octet)
    address = ipaddr
    netmask = ipaddr
    cidr = pp.Literal("/").suppress() + pp.Word(pp.nums, max=2)

    # Description
    desc_keyword = pp.Keyword("description")
    description = pp.Dict(pp.Group(desc_keyword + pp.Group(pp.restOfLine)))

    # Addresses
    #cisco example:
    # ip address 172.29.188.27 255.255.255.224 secondary
    #
    #foundry example:
    # ip address 10.62.161.187/26

    ipaddr_keyword = pp.Keyword("ip address").suppress()
    secondary = pp.Literal("secondary").suppress()

    #foundry matches on cidr and cisco matches on netmask
    #netmask converted to cidr in cleanup
    ip_tuple = pp.Group(address + (cidr ^ netmask)).setResultsName(
        'addr', listAllMatches=True)
    negotiated = pp.Literal('negotiated')  # Seen on Cisco 886
    ip_address = ipaddr_keyword + (negotiated
                                   ^ ip_tuple) + pp.Optional(secondary)

    addrs = pp.ZeroOrMore(ip_address)

    # ACLs
    acl_keyword = pp.Keyword("ip access-group").suppress()

    # acl_name to be [''] or '' depending on acls_as_list
    acl_name = pp.Group(anychar) if acls_as_list else anychar
    direction = pp.oneOf('in out').suppress()
    acl_in = acl_keyword + pp.FollowedBy(acl_name + pp.Literal('in'))
    acl_in.setParseAction(pp.replaceWith('acl_in'))
    acl_out = acl_keyword + pp.FollowedBy(acl_name + pp.Literal('out'))
    acl_out.setParseAction(pp.replaceWith('acl_out'))

    acl = pp.Dict(pp.Group((acl_in ^ acl_out) + acl_name)) + direction
    acls = pp.ZeroOrMore(acl)

    # Interfaces
    iface_keyword = pp.Keyword("interface").suppress()
    foundry_awesome = pp.Literal(" ").suppress() + anychar
    #foundry exmaple:
    #!
    #interface ethernet 6/6
    # ip access-group 126 in
    # ip address 172.18.48.187 255.255.255.255

    #cisco example:
    #!
    #interface Port-channel1
    # description gear1-mtc : AE1 : iwslbfa1-mtc-sw0 :  : 1x1000 : 172.20.166.0/24 :  :  :
    # ip address 172.20.166.251 255.255.255.0

    interface = pp.Combine(anychar + pp.Optional(foundry_awesome))

    iface_body = pp.Optional(description) + pp.Optional(acls) + pp.Optional(
        addrs) + pp.Optional(acls)
    #foundry's body is acl then ip and cisco's is ip then acl

    iface_info = pp.Optional(unwanted) + iface_keyword + pp.Dict(
        pp.Group(interface + iface_body)) + pp.Optional(pp.SkipTo(bang))

    interfaces = pp.Dict(pp.ZeroOrMore(iface_info))

    # This is where the parsing is actually happening
    try:
        results = interfaces.parseString(data)
    except:  # (ParseException, ParseFatalException, RecursiveGrammarException):
        results = {}

    if auto_cleanup:
        return _cleanup_interface_results(results, skip_disabled=skip_disabled)
    return results
예제 #27
0
unquote = lambda s, l, t: UNQUOTE_PAIRS.sub(r"\1", t[0][1:-1])

# https://tools.ietf.org/html/rfc7235#section-1.2
# https://tools.ietf.org/html/rfc7235#appendix-B
tchar = "!#$%&'*+-.^_`|~" + pp.nums + pp.alphas
token = pp.Word(tchar).setName("token")
token68 = pp.Combine(
    pp.Word("-._~+/" + pp.nums + pp.alphas) +
    pp.Optional(pp.Word("=").leaveWhitespace())).setName("token68")

quoted_string = pp.dblQuotedString.copy().setName(
    "quoted-string").setParseAction(unquote)
auth_param_name = token.copy().setName("auth-param-name").addParseAction(
    downcaseTokens)
auth_param = auth_param_name + pp.Suppress("=") + (quoted_string | token)
params = pp.Dict(pp.delimitedList(pp.Group(auth_param)))

scheme = token("scheme")
challenge = scheme + (params("params") | token68("token"))

authentication_info = params.copy()
www_authenticate = pp.delimitedList(pp.Group(challenge))


def _parse_authentication_info(headers, headername="authentication-info"):
    """https://tools.ietf.org/html/rfc7615
    """
    header = headers.get(headername, "").strip()
    if not header:
        return {}
    try:
예제 #28
0
class SELinuxParser(text_parser.PyparsingSingleLineTextParser):
    """Parser for SELinux audit.log files."""

    NAME = 'selinux'
    DESCRIPTION = 'Parser for SELinux audit.log files.'

    _SELINUX_KEY_VALUE_GROUP = pyparsing.Group(
        pyparsing.Word(pyparsing.alphanums).setResultsName('key') +
        pyparsing.Suppress('=') +
        (pyparsing.QuotedString('"')
         ^ pyparsing.Word(pyparsing.printables)).setResultsName('value'))

    _SELINUX_KEY_VALUE_DICT = pyparsing.Dict(
        pyparsing.ZeroOrMore(_SELINUX_KEY_VALUE_GROUP))

    _SELINUX_BODY_GROUP = pyparsing.Group(
        pyparsing.Empty().setResultsName('key') +
        pyparsing.restOfLine.setResultsName('value'))

    _SELINUX_MSG_GROUP = pyparsing.Group(
        pyparsing.Literal('msg').setResultsName('key') +
        pyparsing.Suppress('=audit(') +
        pyparsing.Word(pyparsing.nums).setResultsName('seconds') +
        pyparsing.Suppress('.') +
        pyparsing.Word(pyparsing.nums).setResultsName('milliseconds') +
        pyparsing.Suppress(':') +
        pyparsing.Word(pyparsing.nums).setResultsName('serial') +
        pyparsing.Suppress('):'))

    _SELINUX_TYPE_GROUP = pyparsing.Group(
        pyparsing.Literal('type').setResultsName('key') +
        pyparsing.Suppress('=') +
        (pyparsing.Word(pyparsing.srange('[A-Z_]'))
         ^ pyparsing.Regex(r'UNKNOWN\[[0-9]+\]')).setResultsName('value'))

    _SELINUX_TYPE_AVC_GROUP = pyparsing.Group(
        pyparsing.Literal('type').setResultsName('key') +
        pyparsing.Suppress('=') +
        (pyparsing.Word('AVC')
         ^ pyparsing.Word('USER_AVC')).setResultsName('value'))

    # A log line is formatted as: type=TYPE msg=audit([0-9]+\.[0-9]+:[0-9]+): .*
    _SELINUX_LOG_LINE = pyparsing.Dict(_SELINUX_TYPE_GROUP +
                                       _SELINUX_MSG_GROUP +
                                       _SELINUX_BODY_GROUP)

    LINE_STRUCTURES = [('line', _SELINUX_LOG_LINE)]

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a structure of tokens derived from a line of a text file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key != 'line':
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        msg_value = structure.get('msg')
        if not msg_value:
            parser_mediator.ProduceExtractionError(
                'missing msg value: {0!s}'.format(structure))
            return

        try:
            seconds = int(msg_value[0], 10)
        except ValueError:
            parser_mediator.ProduceExtractionError(
                'unsupported number of seconds in msg value: {0!s}'.format(
                    structure))
            return

        try:
            milliseconds = int(msg_value[1], 10)
        except ValueError:
            parser_mediator.ProduceExtractionError(
                'unsupported number of milliseconds in msg value: {0!s}'.
                format(structure))
            return

        timestamp = ((seconds * 1000) + milliseconds) * 1000
        body_text = structure[2][0]

        try:
            # Try to parse the body text as key value pairs. Note that not
            # all log lines will be properly formatted key value pairs.
            key_value_dict = self._SELINUX_KEY_VALUE_DICT.parseString(
                body_text)
        except pyparsing.ParseException:
            key_value_dict = {}

        event_data = SELinuxLogEventData()
        event_data.audit_type = structure.get('type', None)
        event_data.body = body_text
        event_data.pid = key_value_dict.get('pid', None)
        # TODO: pass line number to offset or remove.
        event_data.offset = 0

        event = time_events.TimestampEvent(
            timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, parser_mediator, line):
        """Verifies if a line from a text file is in the expected format.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (bytes): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
        try:
            structure = self._SELINUX_LOG_LINE.parseString(line)
        except pyparsing.ParseException as exception:
            logging.debug(
                'Unable to parse SELinux audit.log file with error: {0:s}'.
                format(exception))
            return False

        return 'type' in structure and 'msg' in structure
예제 #29
0
class SELinuxParser(text_parser.PyparsingSingleLineTextParser):
    """Parser for SELinux audit.log files."""

    NAME = u'selinux'
    DESCRIPTION = u'Parser for SELinux audit.log files.'

    _SELINUX_KEY_VALUE_GROUP = pyparsing.Group(
        pyparsing.Word(pyparsing.alphanums).setResultsName(u'key') +
        pyparsing.Suppress(u'=') +
        (pyparsing.QuotedString(u'"')
         ^ pyparsing.Word(pyparsing.printables)).setResultsName(u'value'))

    _SELINUX_KEY_VALUE_DICT = pyparsing.Dict(
        pyparsing.ZeroOrMore(_SELINUX_KEY_VALUE_GROUP))

    _SELINUX_BODY_GROUP = pyparsing.Group(
        pyparsing.Empty().setResultsName(u'key') +
        pyparsing.restOfLine.setResultsName(u'value'))

    _SELINUX_MSG_GROUP = pyparsing.Group(
        pyparsing.Literal(u'msg').setResultsName(u'key') +
        pyparsing.Suppress(u'=audit(') +
        pyparsing.Word(pyparsing.nums).setResultsName(u'seconds') +
        pyparsing.Suppress(u'.') +
        pyparsing.Word(pyparsing.nums).setResultsName(u'milliseconds') +
        pyparsing.Suppress(u':') +
        pyparsing.Word(pyparsing.nums).setResultsName(u'serial') +
        pyparsing.Suppress(u'):'))

    _SELINUX_TYPE_GROUP = pyparsing.Group(
        pyparsing.Literal(u'type').setResultsName(u'key') +
        pyparsing.Suppress(u'=') +
        (pyparsing.Word(pyparsing.srange(u'[A-Z_]'))
         ^ pyparsing.Regex(r'UNKNOWN\[[0-9]+\]')).setResultsName(u'value'))

    _SELINUX_TYPE_AVC_GROUP = pyparsing.Group(
        pyparsing.Literal(u'type').setResultsName(u'key') +
        pyparsing.Suppress(u'=') +
        (pyparsing.Word(u'AVC')
         ^ pyparsing.Word(u'USER_AVC')).setResultsName(u'value'))

    # A log line is formatted as: type=TYPE msg=audit([0-9]+\.[0-9]+:[0-9]+): .*
    _SELINUX_LOG_LINE = pyparsing.Dict(_SELINUX_TYPE_GROUP +
                                       _SELINUX_MSG_GROUP +
                                       _SELINUX_BODY_GROUP)

    LINE_STRUCTURES = [(u'line', _SELINUX_LOG_LINE)]

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a structure of tokens derived from a line of a text file.

    Args:
      parser_mediator (ParserMediator): parser mediator.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key != u'line':
            raise errors.ParseError(
                u'Unable to parse record, unknown structure: {0:s}'.format(
                    key))

        msg_value = structure.get(u'msg')
        if not msg_value:
            parser_mediator.ProduceExtractionError(
                u'missing msg value: {0!s}'.format(structure))
            return

        try:
            seconds = int(msg_value[0], 10)
        except ValueError:
            parser_mediator.ProduceExtractionError(
                u'unsupported number of seconds in msg value: {0!s}'.format(
                    structure))
            return

        try:
            milliseconds = int(msg_value[1], 10)
        except ValueError:
            parser_mediator.ProduceExtractionError(
                u'unsupported number of milliseconds in msg value: {0!s}'.
                format(structure))
            return

        timestamp = ((seconds * 1000) + milliseconds) * 1000
        body_text = structure[2][0]

        try:
            # Try to parse the body text as key value pairs. Note that not
            # all log lines will be properly formatted key value pairs.
            key_value_dict = self._SELINUX_KEY_VALUE_DICT.parseString(
                body_text)
        except pyparsing.ParseException:
            key_value_dict = {}

        audit_type = structure.get(u'type')
        pid = key_value_dict.get(u'pid')

        event_object = SELinuxLineEvent(timestamp, 0, audit_type, pid,
                                        body_text)
        parser_mediator.ProduceEvent(event_object)

    def VerifyStructure(self, parser_mediator, line):
        """Verifies if a line from a text file is in the expected format.

    Args:
      parser_mediator (ParserMediator): parser mediator.
      line (bytes): line from a text file.

    Returns:
      bool: True if the line is in the expected format.
    """
        try:
            structure = self._SELINUX_LOG_LINE.parseString(line)
        except pyparsing.ParseException as exception:
            logging.debug(
                u'Unable to parse SELinux audit.log file with error: {0:s}'.
                format(exception))
            return False

        return u'type' in structure and u'msg' in structure
예제 #30
0
dict_key_value = pp.dictOf(string_value + pp.Suppress(":"), value)

dict_value << pp.Group(
    pp.Suppress("{") + pp.delimitedList(dict_key_value) +
    pp.Suppress("}")).setParseAction(_parse_type("dict"))

# Positionals must be end of line or has a space (or more) afterwards.
# This is to ensure that the parser treats text like "something=" as invalid
# instead of parsing this as positional "something" and leaving the "=" as
# invalid on its own.
positionals = pp.ZeroOrMore(value + (
    pp.StringEnd()
    ^ pp.Suppress(pp.OneOrMore(pp.White())))).setResultsName("positionals")

key_value = pp.Dict(
    pp.ZeroOrMore(pp.Group(identifier + pp.Suppress("=") +
                           value))).setResultsName("kv")

subcommand = identifier.setResultsName("__subcommand__")

# Subcommand is optional here as it maybe missing, in this case we still want to
# pass the parsing and we will handle the fact that the subcommand is missing
# while validating the arguments
command_with_subcommand = pp.Optional(subcommand) + key_value + positionals

# Positionals will be passed as the last argument
command = key_value + positionals


def parse(text: str, expect_subcommand: bool) -> pp.ParseResults:
    expected_pattern = command_with_subcommand if expect_subcommand else command