def swallows_exceptions(js_dest: str, exclude: list = None) -> bool: """ Search for ``catch`` blocks that are empty or only have comments. See `REQ.161 <https://fluidattacks.com/web/rules/161/>`_. See `CWE-391 <https://cwe.mitre.org/data/definitions/391.html>`_. :param js_dest: Path to a JavaScript source file or package. :param exclude: Paths that contains any string from this list are ignored. """ # Empty() grammar matches 'anything' # ~Empty() grammar matches 'not anything' or 'nothing' classic = Suppress(Keyword('catch')) + nestedExpr(opener='(', closer=')') \ + nestedExpr(opener='{', closer='}', content=~Empty()) modern = Suppress('.' + Keyword('catch')) + nestedExpr( opener='(', closer=')', content=~Empty()) grammar = MatchFirst([classic, modern]) grammar.ignore(cppStyleComment) try: matches = lang.path_contains_grammar(grammar, js_dest, LANGUAGE_SPECS, exclude) except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=js_dest)) else: if matches: show_open('Code has empty "catch" blocks', details=dict(matched=matches)) return True show_close('Code does not have empty "catch" blocks', details=dict(code_dest=js_dest)) return False
def create_dict_bnf(allow_tuple=False, free_word=False): word = word_free if free_word else word_strict defs = get_standard_type_defs(word) empty = Empty() empty.setParseAction( lambda toks: [{}]) if allow_tuple: return defs['dict'].inner | defs['tuple'].inner | empty else: return defs['dict'].inner | empty
def get_standard_type_defs(word=word_free): """ Return dict of the pyparsing base lexical elements. The compound types (tuple, list, dict) can contain compound types or simple types such as integers, floats and words. Parameters ---------- word : lexical element A custom lexical element for word. Returns ------- defs : dict The dictionary with the following items: - tuple: (..., ..., ...) - list: [..., ...., ...] - dict: {...:..., ...:..., ....} or {...=..., ...=..., ....} - list_item: any of preceding compound types or simple types """ tuple_str = Forward() list_str = Forward() dict_str = Forward() cvt_tuple = lambda toks: [tuple(toks.asList())] cvt_dict = lambda toks: [dict(toks.asList())] list_item = (none ^ boolean ^ cmplx ^ real ^ integer ^ list_str ^ tuple_str ^ dict_str ^ quotedString.setParseAction(removeQuotes) ^ word) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_str.inner = Empty() ^ list_of(list_item) list_str.inner = tuple_str.inner.copy() tuple_str.inner.setParseAction(cvt_tuple) tuple_str << (lparen + tuple_str.inner + rparen) list_str.inner.setParseAction(lambda toks: [toks.asList()]) list_str << (lbrack + list_str.inner + rbrack) dict_entry = Group(list_item + (colon | equal_sign) + list_item2) dict_str.inner = Empty() ^ list_of(dict_entry) dict_str.inner.setParseAction(cvt_dict) dict_str << (lbrace + (dict_str.inner | Empty().setParseAction(lambda x: [{}])) + rbrace) defs = { 'tuple': tuple_str, 'list': list_str, 'dict': dict_str, 'list_item': list_item } return defs
def get_standard_type_defs(word): tuple_str = Forward() list_str = Forward() dict_str = Forward() cvt_tuple = lambda toks: tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) list_item = (none | boolean | real | integer | list_str | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | word) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_str.inner = list_of(list_item) tuple_str.inner.setParseAction(cvt_tuple) tuple_str << (lparen + tuple_str.inner + rparen) list_str.inner = tuple_str.inner.copy() list_str.inner.setParseAction(lambda toks: list(toks)) list_str << (lbrack + list_str.inner + rbrack) dict_entry = Group(list_item + colon + list_item2) dict_str.inner = list_of(dict_entry) dict_str.inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_str.inner) + rbrace) return { 'tuple': tuple_str, 'list': list_str, 'dict': dict_str, 'list_item': list_item }
def bnf(): comment = "#" + restOfLine variable_name = Word(alphas, alphanums + "_") literal = Word(nums) operator = Word("+-|&", max=1) comparsion = Literal("!=") | "<=" | "<" | ">=" | ">" | "==" variable_reference = Suppress("@") + variable_name("address") literal_reference = Suppress("@") + literal("address") value = Group(variable_reference | variable_name | literal_reference | literal) operation = ((value("lhs") + operator("op") + value("rhs")) | Group("-" + value("negative_val")) | value("val")) command = value("name") + Suppress("=") + operation("value") goto = (Suppress("goto") + variable_name("target") + Optional( Suppress("if") + operation("value") + comparsion("cmp") + Suppress("0") + Optional(Suppress("else") + variable_name("elsetgt")))) label = value("anchor") + Suppress(":") return (command | label | goto | Empty()).ignore(comment)
def __init__(self): self.locator = Empty().setParseAction(self.locator_parse_action)('location') import_entry = Group(Suppress(Keyword('import')) - self.locator + SkipTo(LineEnd())('path')) imports = ZeroOrMore(import_entry)('imports') array = Group(Suppress('[') + Suppress(']')) type_ = Group(self.locator + (Word(alphas, alphanums) + ZeroOrMore(Suppress('.') - Word(alphas, alphanums)))('name') - ZeroOrMore(array)('array')) identifier = Word(alphas, alphanums) param = Group(self.locator + type_('type') + identifier('name')) params_list = Group(Optional(param + ZeroOrMore(Suppress(',') + param))) method = Group(self.locator + type_('ret_type') + identifier('name') + Suppress('(') + params_list('params') + Suppress(');')) method_list = Group(ZeroOrMore(method)) package_name = Group(identifier + ZeroOrMore(Suppress('.') + identifier)) bases_list = type_ + ZeroOrMore(Suppress(',') + type_) interface = Group(self.locator + Keyword('interface')('kind') - identifier('name') + Optional(Suppress(':') - bases_list)('bases') + Suppress('{') + method_list('methods') + Suppress('}')) integer_constant = Word(nums).setParseAction(lambda s, l, t: int(t[0])) enum_value = Group(self.locator + identifier('name') - Optional(Suppress('=') + integer_constant('value'))) enum_values_list = Group(Optional(enum_value + ZeroOrMore(Suppress(',') + enum_value))) enum = Group(self.locator + Keyword('enum')('kind') - identifier('name') + Suppress('{') + enum_values_list('values') + Suppress('}')) struct_member = Group(self.locator + type_('type') + identifier('name') + Suppress(';')) struct_members_list = Group(ZeroOrMore(struct_member)) struct = Group(self.locator + Keyword('struct')('kind') - identifier('name') + Suppress('{') + struct_members_list('members') + Suppress('}')) package = Suppress(Keyword('package')) + package_name('package') + Suppress('{') + Group(ZeroOrMore(interface | enum | struct))('types') + Suppress('}') self.grammar = imports + package self.grammar.ignore(cppStyleComment) self.grammar.parseWithTabs()
def _parse_data(data: str) -> Dict[str, Any]: lcur, rcur, lbrk, rbrk, comma, eq = map(Suppress, '{}[],=') tablekey = Regex(r'[a-z][a-z0-9_]*') | (lbrk + QuotedString(quoteChar="'") + rbrk) qstring = QuotedString(quoteChar='"') value = Forward() keyval = (tablekey + eq + value).setParseAction(lambda s, l, t: [(str(t[0]), t[1])]) array_table = ( value + ZeroOrMore(comma + value)).setParseAction(lambda s, l, t: [list(t)]) dict_table = (keyval + ZeroOrMore(comma + keyval) ).setParseAction(lambda s, l, t: [{k: v for k, v in t}]) table = lcur + (dict_table | array_table | Empty().setParseAction(lambda s, l, t: [None])) + rcur value << (qstring | table) root = ZeroOrMore(keyval).setParseAction( lambda s, l, t: {k: v for k, v in t}) return root.parseString(data, parseAll=True)[0] # type: ignore
def list_dict(word=word_free): """ Return the pyparsing lexical element, that parses a string either as a list or as a dictionary. Parameters ---------- word : lexical element A custom lexical element for word. Returns ------- ld : lexical element The returned lexical element parses a string in the form ``..., ..., ...`` or ``key1:..., key2=..., key3: ...`` where ``...`` is a ``list_item`` from :func:`get_standard_type_defs()` and interprets it as a list or a dictionary. """ defs = get_standard_type_defs(word) i = defs['list_item'] arg = i.copy() arg.setParseAction(lambda t: (t[0],)) narg = word_strict + (colon | equal_sign) + i narg.setParseAction(lambda t: (t[0], t[1])) ld = Group(list_of(narg ^ arg) | Empty() ) ld.setParseAction(lambda t: ([x[0] for x in t[0] if len(x) == 1], dict([x for x in t[0] if len(x) > 1])) ) return ld
def _parse_func(text, is_output=False): if is_output: text = text[text.index(FUNCTION_START):text.index(FUNCTION_END)] funcs = {} # syntax we don't want to see in the final parse tree LPAR, RPAR, LBRACE, RBRACE, COMMA, SEMICOLON = map(Suppress, "(){},;") const = Keyword("const") dtype = Word(alphanums + "_*") identifier = Word(alphanums + "_") argDecl = Group( Optional(const("const")) + dtype("dtype") + Optional(identifier("name")) + Optional(COMMA)) args = Group(ZeroOrMore(argDecl)) if not is_output: func = Suppress("DVZ_EXPORT") else: func = Empty() func = func + \ dtype("out") + \ identifier("name") + \ LPAR + args("args") + RPAR + \ Optional(SEMICOLON) for item, start, stop in func.scanString(text): args = [] for i, entry in enumerate(item.args): args.append((entry.const, entry.dtype, entry.name)) funcs[item.name] = (item.out, tuple(args)) return funcs
def make_multiple(head, tail=None, wrap_tail=False): """We have a recurring need to parse citations which have a string of terms, e.g. section 11(a), (b)(4), and (5). This function is a shorthand for setting these elements up""" if tail is None: tail = head # Use `Empty` over `copy` as `head`/`tail` may be single-element grammars, # in which case we don't want to completely rename the results head = (head + Empty()).setParseAction(keep_pos).setResultsName("head") # We need to address just the matching text separately from the # conjunctive phrase tail = (tail + Empty()).setParseAction(keep_pos).setResultsName("match") tail = (atomic.conj_phrases + tail).setResultsName("tail", listAllMatches=True) if wrap_tail: tail = Optional(Suppress('(')) + tail + Optional(Suppress(')')) return QuickSearchable(head + OneOrMore(tail))
def _create_simple_statements(): global binary, ident, rvalue, simple_statement, semi, comp, number, slot_id, subtract_stmt, callrpc_stmt, generic_statement, streamer_stmt, stream, selector if simple_statement is not None: return meta_stmt = Group( Literal('meta').suppress() + ident + Literal('=').suppress() + rvalue + semi).setResultsName('meta_statement') require_stmt = Group( Literal('require').suppress() + ident + comp + rvalue + semi).setResultsName('require_statement') set_stmt = Group( Literal('set').suppress() - (ident | number) - Literal("to").suppress() - (rvalue | binary) - Optional(Literal('as').suppress() + config_type) + semi).setResultsName('set_statement') callrpc_stmt = Group( Literal("call").suppress() + (ident | number) + Literal("on").suppress() + slot_id + Optional(Literal("=>").suppress() + stream('explicit_stream')) + semi).setResultsName('call_statement') streamer_stmt = Group( Optional(Literal("manual")('manual')) + Optional(oneOf(u'encrypted signed')('security')) + Optional((Literal(u'realtime')('realtime') | Literal(u'broadcast')('broadcast'))) + Literal('streamer').suppress() - Literal('on').suppress() - selector('selector') - Optional(Literal('to').suppress() - slot_id('explicit_tile')) - Optional( Literal('with').suppress() - Literal('streamer').suppress() - number('with_other')) - semi).setResultsName('streamer_statement') copy_stmt = Group( Literal("copy").suppress() - Optional(oneOf("all count average")('modifier')) - Optional(stream('explicit_input') | number('constant_input')) - Literal("=>").suppress() - stream("output") - semi).setResultsName('copy_statement') subtract_stmt = Group( Literal("subtract").suppress() - stream('subtract_input') - Literal('=>').suppress() - stream('stream') - Optional( Literal(",").suppress() - Literal('default') - number('default'))).setResultsName('subtract_statement') trigger_stmt = Group( Literal("trigger") - Literal("streamer") - number('index') - semi).setResultsName('trigger_statement') simple_statement = meta_stmt | require_stmt | set_stmt | callrpc_stmt | streamer_stmt | trigger_stmt | copy_stmt | subtract_stmt # In generic statements, keep track of the location where the match started for error handling locator = Empty().setParseAction(lambda s, l, t: l)('location') generic_statement = Group( locator + Group(ZeroOrMore(Regex(u"[^{};]+")) + Literal(u';'))('match')).setResultsName('unparsed_statement')
def Quote(q): q = Literal(q).suppress() quotedStringBit = Combine( OneOrMore(escapedSymbol | (~q + Regex('[^{}]', re.S))))('stringBit').leaveWhitespace() quotedString = q + (OneOrMore( Group(item.leaveWhitespace() | source.leaveWhitespace() | quotedStringBit)) | Empty())('value') + q return quotedString
def __init__(self, rule): self.rule = rule self.opener = self.rule['opener'] self.closer = self.rule['closer'] self.columns = self.rule.get('columns', -1) nested = nestedExpr(opener=self.opener, closer=self.closer, content=CharsNotIn(self.opener + self.closer)) if self.columns < 0: self.nested = OneOrMore(nested) else: self.nested = nested * self.columns + Or([CharsNotIn('\n'), Empty()])
def repeat(parser, n): if n < 0: raise ValueError, "Can't repeat less than zero times." if n == 0: # this should in principle never happen. return Empty() elif n == 1: return parser else: return parser + repeat(parser, n - 1)
def create_ast(expression_str: str) -> List[Union[str, List]]: """Evaluates the given expression""" expression = Forward() operand = Group(Char(alphas) + ZeroOrMore("'")) | Group( Group("(" + expression + ")") + ZeroOrMore("'")) expression <<= infixNotation( operand, [ (Empty(), 2, opAssoc.LEFT), ("*", 2, opAssoc.LEFT), ("+", 2, opAssoc.LEFT), ], ) return expression.parseString(expression_str, parseAll=True).asList()
def _prepare_parser(): number = Regex(r'-?\d+') local = Regex(r'%[A-Za-z0-9._]+') glob = Regex(r'@[A-Za-z0-9._]+') meta = Regex(r'![A-Za-z0-9._]+') keywords = lambda keywords: MatchFirst( Keyword(word) for word in keywords.split()) seplist = lambda entry: delimitedList(entry) | Empty() label = local + ':' unused_def = (keywords('target declare attributes') | '!') + restOfLine type_ = Forward() void = Keyword('void') scalar_type = keywords('i1 i8 i16 i32 i64') | void types_list = seplist(type_) struct_type = '{' + types_list - '}' array_type = '[' - number - 'x' - type_ - ']' type_ << (scalar_type | local | struct_type | array_type) type_def = local + '=' + Keyword('type') - struct_type value = Forward() typed_value = type_ + value value_list = seplist(typed_value) compound_value = '{' + value_list - '}' array_value = '[' + value_list - ']' kw_value = keywords('zeroinitializer null true false') value << (number | kw_value | compound_value | array_value) linkage = Optional(keywords('private external internal common'), 'external') align = Optional(',' + Keyword('align') - number) metas = seplist(meta + meta) global_tag = keywords('global constant') initializer = Optional(value, default='undef') global_def = glob - '=' - linkage - global_tag - type_ - initializer - align - metas definition = unused_def | type_def | global_def llvm = ZeroOrMore(definition) comment = ';' + restOfLine llvm.ignore(comment) return llvm
def parse(self, header): comment = self._comment() quoted = quotedString.copy().setParseAction(removeQuotes) string = quoted | Word(printables, excludeChars='{},%') enum_value = quotedString | Word(printables, excludeChars='{},%') relation = (Suppress(CaselessLiteral("@relation")) + Optional(restOfLine, default='default_name') ('rel_name').setParseAction(lambda t: t.rel_name.strip())) relation_part = ZeroOrMore(comment) + relation + ZeroOrMore(comment) nominal = (Empty().copy().setParseAction(lambda t: self.ENUM) + Suppress(Literal("{")) + Group(delimitedList(enum_value, delim=self._separator)) ("next_arg").setParseAction(self.get_values) + Suppress(Literal("}"))) date = CaselessLiteral("date") + Optional( CharsNotIn("{},\n"))("next_arg").setParseAction( self._adapt_date_format) attributes_part = Forward() relational = CaselessLiteral( "relational") + attributes_part + Suppress( CaselessLiteral("@end")) + string attr_type = (CaselessLiteral("numeric") | CaselessLiteral("string") | nominal | date | relational)("attr_type") attribute = Suppress(CaselessLiteral("@attribute")) + ( string.copy())("attr_name") + attr_type attribute_line = comment | attribute attributes_part << (Group(OneOrMore(attribute_line)))("children") data_part = (CaselessLiteral("@data"))("data_start").setParseAction( lambda s, p, k: (lineno(p, s))) arff_header = relation_part + attributes_part + data_part attribute.setParseAction(self._create_attribute) try: result = arff_header.parseString(header, parseAll=True) except ParseException as e: raise HeaderError(FileType.ARFF, e.lineno, e.col, e.line, e) self._relation_name = result.rel_name self._find_relational(result.children) self._linearize_attrs(result.children) self._data_start = result.data_start self._index = 0
def _format_args(aStr): """ Process composed function string into nested pyparsing.ParseResults :param str aStr: string to parse :return: formatting result :rtype: pyparsing.ParseResults """ identifier = Word(alphas, alphanums + "_") integer = Combine(Optional(Literal('-')) + Word(nums)) functor = identifier lparen = Literal("(").suppress() rparen = Literal(")").suppress() expression = Forward() arg = Group(expression) | identifier | integer | Empty() args = arg + ZeroOrMore("," + arg) expression << functor + Group(lparen + args + rparen) return expression.parseString(aStr)
def create_bnf(): cvt_int = lambda toks: int(toks[0]) cvt_real = lambda toks: float(toks[0]) cvt_tuple = lambda toks : tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) # define punctuation as suppressed literals (lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon) = map(Suppress,"()[]{}:") integer = Combine(Optional(oneOf("+ -")) + Word(nums)).setName("integer") integer.setParseAction(cvt_int) real = Combine(Optional(oneOf("+ -"))+ Word(nums) + "." + Optional(Word(nums)) + Optional("e" + Optional(oneOf("+ -")) + Word(nums))).setName("real") real.setParseAction(cvt_real) tuple_str = Forward() list_str = Forward() dict_str = Forward() list_item = (real | integer | Group(list_str) | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | Word(alphas8bit + alphas, alphas8bit + alphanums + "_")) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_str << (Suppress("(") + Optional(delimitedList(list_item)) + Optional(Suppress(",")) + Suppress(")")) tuple_str.setParseAction(cvt_tuple) list_str << (lbrack + Optional(delimitedList(list_item) + Optional(Suppress(","))) + rbrack) dict_entry = Group(list_item + colon + list_item2) dict_inner = delimitedList(dict_entry) + Optional(Suppress(",")) dict_inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_inner) + rbrace) return dict_inner
def __init_execution_parser(self): varname = Word(alphas + nums + T_US) bitup = T_OP + varname + T_CM + varname + T_CP trtup = T_OP + T_OP + varname + T_CM + varname + T_CP + T_CM + varname + T_CP birel = bitup + ZeroOrMore(T_CM + bitup) trrel = trtup + ZeroOrMore(T_CM + trtup) trrelation = (varname + T_EQ + T_OCB + (trrel) + T_CCB)(P_TRREL) birelation = (varname + T_EQ + T_OCB + (birel) + T_CCB)(P_BIREL) emrelation = (varname + T_EQ + T_OCB + (Empty()) + T_CCB)(P_EMREL) varassign = (T_OP + varname + T_EQ + varname + T_CP)(P_ASS) done = Literal(T_DONE) assign = trrelation | birelation | emrelation | varassign | done assigns = assign + ZeroOrMore(T_AND + assign) return assigns
def _create_block_bnf(): global block_bnf, time_interval, slot_id, statement, block_id, ident, stream if block_bnf is not None: return trigger_clause = Group(stream_trigger | Group(stream).setResultsName('stream_always') | Group(ident).setResultsName('identifier')) every_block_id = Group(Literal(u'every').suppress() - (time_interval | tick_interval)).setResultsName('every_block') when_block_id = Group(Literal(u'when').suppress() + Literal("connected").suppress() - Literal("to").suppress() - slot_id).setResultsName('when_block') latch_block_id = Group(Literal(u'when').suppress() - stream_trigger).setResultsName('latch_block') config_block_id = Group(Literal(u'config').suppress() - slot_id).setResultsName('config_block') on_block_id = Group(Literal(u'on').suppress() - trigger_clause.setResultsName('triggerA') - Optional((Literal("and") | Literal("or")) - trigger_clause.setResultsName('triggerB'))).setResultsName('on_block') # Keep track of the location where the match started for error handling locator = Empty().setParseAction(lambda s, l, t: l)('location') block_id = Group(locator + (every_block_id | when_block_id | latch_block_id | config_block_id | on_block_id)) block_bnf = Forward() statement = generic_statement | block_bnf block_bnf << Group(block_id + Group(Literal(u'{').suppress() + ZeroOrMore(statement) + Literal(u'}').suppress())).setResultsName('block')
def parse(self, file_path): string = Word(printables, excludeChars="|?,:").setParseAction(self._remove_dots) comment = "|" + restOfLine delimiter = Suppress(".") | LineEnd() enum = (Empty().copy().setParseAction(lambda t: self.ENUM) + Group(delimitedList(string))("next_arg").setParseAction( lambda t: {'values': t.next_arg.asList()})) discrete = Literal("discrete") + Suppress(Word(nums)) attr_type = (Literal("continuous") | Literal("ignore") | discrete | enum)("type") attribute = string("name") + Suppress(":") + attr_type cls = string("cls") cls.addParseAction(self._get_class) classes = delimitedList(cls) entry = attribute | classes attribute.setParseAction(self._create_attribute) parser = OneOrMore(entry + Optional(delimiter)) parser.ignore(comment) try: parser.parseFile(file_path, parseAll=True) except ParseException as e: raise HeaderError(FileType.DATA, e.lineno, e.col, e.line, e)
alphanums, delimitedList, lineStart, ) from .sections import GDSection, GDSectionHeader from .values import value key = Word(alphanums + "_/").setName("key") var = Word(alphanums + "_").setName("variable") attribute = Group(var + Suppress("=") + value) # [node name="Node2D"] section_header = ((Suppress(lineStart) + Suppress("[") + var.setResultsName("section_type") + Optional(delimitedList(attribute, Empty())) + Suppress("]")).setName("section_header").setParseAction( GDSectionHeader.from_parser)) # texture = ExtResource( 1 ) section_entry = Group(Suppress(lineStart) + key + Suppress("=") + value).setName("section_entry") section_contents = delimitedList(section_entry, Empty()).setName("section_contents") # [node name="Sprite" type="Sprite"] # texture = ExtResource( 1 ) section = ((section_header + Optional(section_contents)).setName("section").setParseAction( GDSection.from_parser))
from pyparsing import Literal, Word, OneOrMore, Combine, nums, alphanums, oneOf, Empty listen = Literal("listen") ipaddr = Word(nums, min=1, max=3) + ('.' + Word(nums, min=1, max=3)) * 3 dnsname = Word(alphanums) + OneOrMore('.' + Word(alphanums)) connect = "connect" + Combine(ipaddr ^ dnsname ^ "last" ^ Empty()) quit = Literal("quit") help = Literal("help") nick = "nick" + Word(alphanums, max=15) trust = "trust" + oneOf("0 1 2") fingerprint = Literal("fingerprint") language = "language" + oneOf("en fr") file = "file" + (("download" + Word(nums)) ^ ("upload" + Word(alphanums + "-_./"))) commands = "/" + (listen ^ connect ^ quit ^ help ^ nick ^ trust ^ fingerprint ^ language ^ file)
def parser(file_name): """ Parsing goes here """ LOG.info('Parser Started') with open(file_name, 'r') as input_config: input_data = input_config.read() input_config.seek(0, 2) file_size = input_config.tell() # grammer def:1 source-address-list Anywhere owner System key = Keyword("source-address-list") name = Word(printables) system = Keyword("owner") system_name = Word(alphanums) comment_key = Keyword("comments") comment = quotedString | Word(printables) grammer1 = Group(key + name + system + system_name + Optional(comment_key + comment)) # grammer def:2 ip address 10.10.10.10 255.255.255.255 key1 = "ip address" ipaddress = Combine(Word(nums) + ('.' + Word(nums)) * 3) ipaddress1 = Empty().addParseAction( replaceWith('Mask')) + Combine(Word(nums) + ('.' + Word(nums)) * 3) grammer2 = Group(key1 + Optional(ipaddress) + Optional(ipaddress1) + Optional('::/0')) # grammer def:3 domain-list <name> owner System key = Keyword("domain-list") name = Word(printables) own = Keyword("owner") owner_name = Word(alphanums) comments = Keyword('comment') comment = quotedString | Word(printables) grammer3 = Group(key + name + own + owner_name + Optional(comments + comment)) # grammer def:4 domain_key = Keyword("domain") domain_name = Word(printables) grammer4 = Group(domain_key + ~Literal('-') + domain_name) # grammer def:4 answer vip 10.10.10.10 name <name> location # "<location>" manual-reactivation disable activate answer_key = Keyword("answer vip") ipaddress = Combine(Word(nums) + ('.' + Word(nums)) * 3) name_key = Keyword("name") name = Word(alphanums) location_key = Keyword("location") location = quotedString manual_reactivation_key = Keyword("manual-reactivation") manual_reactivation = Keyword("disable") | Keyword("enable") activate_key = Empty().addParseAction( replaceWith('Mode')) + Keyword("activate") grammer5 = Group(answer_key + ipaddress + name_key + name + location_key + location + Optional(manual_reactivation_key) + Optional(manual_reactivation) + Optional(activate_key)) # grammer6 : keepalive type tcp port <port> ip-address <ip> <<retries>> # <<successful-probes>> <<termination>> # keepalive type http-head port 80 <<path>> <<retries>> # <<successful-probes>> <<shared>> <<termination>> # keepalive type icmp ip-address <ip> <<retries>> # <<successful-probes>> key = Keyword('keepalive') tcp_key = Keyword("type tcp") http_key = Keyword("type http-head") icmp_key = Keyword("type icmp") port_key = Keyword("port") num = Word(nums) ip_add_key = Keyword("ip-address") ip_add = Combine(Word(nums) + ('.' + Word(nums)) * 3) retry = Optional(Keyword('retries') + num) probe = Optional(Keyword('successful-probes') + num) shared = Optional(Keyword('shared') + ip_add) path = Optional(Keyword('path') + ip_add) termination = Optional( Keyword('termination') + (Word('graceful') | Word('reset'))) grammer6_1 = Group(key + tcp_key + port_key + num + ip_add_key + ip_add + Optional(retry) + Optional(probe) + Optional(termination)) grammer6_2 = Group(key + http_key + path + port_key + num + retry + probe + shared + termination) grammer6_3 = Group(key + icmp_key + Optional(ip_add_key + ip_add) + retry + probe) grammer6 = grammer6_1 | grammer6_2 | grammer6_3 # grammer 7: answer-group <name> owner System type vip comment "comment" key = Keyword("answer-group") key_name = Word(printables) owner_key = Keyword("owner") owner_name = Word(alphanums) type_key = Keyword("type") type_name = Word(alphas) comment_key = Keyword("comments") comments = quotedString() | Word(printables) grammer7 = Group(key + key_name + owner_key + owner_name + type_key + type_name + Optional(comment_key + comments)) # grammer 8: answer-add 10.10.10.10 name MDC-PROD-SMTP-ACE \ # weight 1 order 1 load-threshold 254 suspend key = Keyword('answer-add') key_ip = Combine(Word(nums) + ('.' + Word(nums)) * 3) name_key = Keyword('name') name_val = Word(printables) weight_key = Keyword('weight') weight_val = Word(nums) order_key = Keyword('order') order_val = Word(nums) thres_key = Keyword('load-threshold') thres_val = Word(nums) suspend_key = Empty().addParseAction(replaceWith('Mode')) + Word(alphas) grammer8 = Group(key + key_ip + name_key + name_val + weight_key + weight_val + order_key + order_val + thres_key + thres_val + suspend_key) # grammer9:dns rule <rule name> owner System source-address-list # Anywhere domain-list <dl_name> activate # query a # <sticky | sticky method> <domain | domain-list> timeout 15 key = Keyword("dns rule") key_val = Word(printables) owner_key = Keyword("owner") owner_name = Word(alphas) saddlist_key = Keyword("source-address-list") saddlist_val = Word(alphanums) domain_key = Keyword("domain-list") domain_val = Word(printables) activate_key = Empty().addParseAction( replaceWith('Mode')) + Keyword("activate") query_key = Keyword("query") query_val = Word("a") | Word(printables) s_key = Keyword('sticky method') | Keyword('sticky') d_key = Keyword('domain') | Keyword('domain-list') t_key = Keyword('timeout') t_val = Word(nums) grammer9 = Group(key + key_val + owner_key + owner_name + saddlist_key + saddlist_val + domain_key + domain_val + activate_key + Optional(query_key + query_val) + Optional(s_key + d_key + t_key + t_val)) # grammer10 : # clause 1 vip-group <name> method ordered ttl 20 \ # count 1 <sticky|region-sticky> enabled manual-reactivation disable activate key = Keyword("clause") key_val = Word(nums) vip_key = Keyword("vip-group") vip_val = Word(printables) method_key = Keyword("method") method_val = Word(printables) ttl_key = Keyword("ttl") ttl_val = Word(nums) count_key = Keyword("count") count_val = Word(nums) sticky_key = Keyword("sticky") | Keyword("region-sticky") sticky_val = Word("enable") mr_key = Keyword("manual-reactivation") mr_val = Word("disable") state_key = Empty().addParseAction(replaceWith('Mode')) + Word("activate") grammer10 = Group(key + key_val + vip_key + vip_val + method_key + method_val + ttl_key + ttl_val + count_key + count_val + Optional(sticky_key + sticky_val) + mr_key + mr_val + state_key) testing = Group(grammer1 + ZeroOrMore(grammer2)) | Group( grammer3 + ZeroOrMore(grammer4)) | Group(grammer5 + ZeroOrMore(grammer6)) | Group( grammer7 + ZeroOrMore(grammer8)) | Group(grammer9 + ZeroOrMore(grammer10)) LOG.info('Grammar Generated') child_ref = { 'source-address-list': 'ip address', 'domain-list': 'domain', 'answer vip': 'keepalive', 'answer-group': 'answer-add', 'dns rule': 'clause' } excel_dict = dict() out_dict = { 'source-address-list': [], 'domain-list': [], 'answer vip': [], 'answer-group': [], 'dns rule': [] } ref = '' print("Parsing the File ...") total_parse_count = 0 c = 0 for match, start, end in testing.scanString(input_data): # incrementing total object count for reporting total_parse_count += 1 matched = match.asList() type = matched[0][0][0] name = matched[0][0][1] excel_dict[type + '-' + name] = { 'type': type, 'name': name, 'status': '', 'na': '', 'skipped': '' } msg = 'Parsing Entity [ ' + matched[0][0][0] + '->' +\ matched[0][0][1] + ']' printProgressBar(end, file_size, msg, prefix='Progress', suffix='') # dictonary creation start = True for line in matched[0]: out = 'Parsing [ ' + line[0] + '->' + line[1] + ']' LOG.debug(out) # print matched[0] if start: ref = line[0] parent = parse(line) start = False else: child = parse(line) if child_ref[ref] not in parent: parent[str(child_ref[ref])] = [child] else: parent[str(child_ref[ref])].append(child) out_dict[ref].append(parent) LOG.info('Config File Parsed') set_excel_dict(excel_dict) return out_dict, excel_dict
MONTH = oneOf("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec") DAY = Word(nums) SL_TIME = Combine(Word(nums)+":"+Word(nums)+":"+Word(nums)) FULL_HEX = Word('x'+hexnums) PORT = Word(nums) IP4_ADDRESS = Combine(Word(nums) + ('.' + Word(nums))*3) IP6_ADDRESS = Word(hexnums+':') PRINTABLES_NO_CD = printables.replace(',', '') HOSTNAME = Word(alphanums+'.-_')("HOSTNAME") INT = Word(nums) HEX = Word(hexnums) WORD = Word(alphanums) TEXT = Group(OneOrMore(Word(printables))) EMPTY = Empty() DATA_NO_CD = Optional(Word(PRINTABLES_NO_CD)) SYSLOG_PROC = Combine(WORD("app") + Word("[") + INT("pid") + Word("]: ")) SYSLOG_APP = Combine(WORD("app") + Word(": ")) SYSLOG_TS = Combine(MONTH+" "+DAY+" "+SL_TIME) USERNAME = Word(alphanums) APP_LOG_START = Combine(SYSLOG_TS("timestamp") + SP + HOSTNAME("syslog_host") + SP + SYSLOG_APP) PROC_LOG_START = Combine(SYSLOG_TS("timestamp") + SP + HOSTNAME("syslog_host") + SP + SYSLOG_PROC) REPEATED = Combine(Word('message repeated ') + Word(nums)('repeated') + Word(' times: [ '))
equal = Literal('=').suppress() backslash = Literal('\\').suppress() symbols = '''(){},.'"\\|''' arg = Group(boolean | number | none | aString | expression)('args*') kwarg = Group(argname + equal + arg)('kwargs*') args = delimitedList(~kwarg + arg) # lookahead to prevent failing on equals kwargs = delimitedList(kwarg) def setRaw(s, loc, toks): toks[0].raw = s[toks[0].start:toks[0].end] call = Group(Empty().setParseAction(lambda s, l, t: l)('start') + funcname + leftParen + Optional(args + Optional(comma + kwargs)) + rightParen + Empty().leaveWhitespace().setParseAction(lambda s, l, t: l) ('end')).setParseAction(setRaw)('call') # Metric pattern (aka. pathExpression) validMetricChars = ''.join((set(printables) - set(symbols))) escapedChar = backslash + Word(symbols + '=', exact=1) partialPathElem = Combine(OneOrMore(escapedChar | Word(validMetricChars))) matchEnum = Combine(leftBrace + delimitedList(partialPathElem, combine=True) + rightBrace) pathElement = Combine( Group(partialPathElem | matchEnum) +
def create_dbc_grammar(): """Create DBC grammar. """ # DBC file grammar word = Word(printables) integer = Optional(Literal('-')) + Word(nums) number = Word(nums + '.Ee-') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) version = Group(Keyword('VERSION') + QuotedString('"', multiline=True)) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') + colon + Group(ZeroOrMore(symbol))) discard = Suppress(Keyword('BS_') + colon) ecu_list = Group( Keyword('BU_') + colon + ZeroOrMore(Word(printables).setWhitespaceChars(' '))) signal = Group( Keyword(SIGNAL) + word + colon + Group(integer + pipe + integer + at + integer + sign) + Group(lp + number + comma + number + rp) + Group(lb + number + pipe + number + rb) + QuotedString('"', multiline=True) + word) message = Group( Keyword(MESSAGE) + integer + word + integer + word + Group(ZeroOrMore(signal))) comment = Group( Keyword(COMMENT) + ((Keyword(MESSAGE) + integer + QuotedString('"', multiline=True) + scolon) | (Keyword(SIGNAL) + integer + word + QuotedString('"', multiline=True) + scolon) | (Keyword(ECU) + word + QuotedString('"', multiline=True) + scolon))) attribute = Group( Keyword(ATTRIBUTE) + ((QuotedString('"', multiline=True)) | (Keyword(SIGNAL) + QuotedString('"', multiline=True)) | (Keyword(MESSAGE) + QuotedString('"', multiline=True))) + word + ((scolon) | (Group( ZeroOrMore( Group( (comma | Empty()) + QuotedString('"', multiline=True)))) + scolon) | (Group(ZeroOrMore(integer)) + scolon))) default_attr = Group( Keyword(DEFAULT_ATTR) + QuotedString('"', multiline=True) + (integer | QuotedString('"', multiline=True)) + scolon) attr_definition = Group( Keyword(ATTR_DEFINITION) + QuotedString('"', multiline=True) + (Keyword(MESSAGE) | Keyword(SIGNAL)) + integer + integer + scolon) choice = Group( Keyword(CHOICE) + integer + word + Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) + scolon) entry = version | symbols | discard | ecu_list | message | comment | \ attribute | default_attr | attr_definition | choice grammar = OneOrMore(entry) return grammar
def _make_arabic_parser(): escapechar = "//" # wordchars = printables # for specialchar in '*?^():"{}[] ' + escapechar: # wordchars = wordchars.replace(specialchar, "") # wordtext = Word(wordchars) alephba = u""" abcdefghijklmnopqrstuvwxyz_ األآإـتنمكطدجحخهعغفقثصضشسيبئءؤرىةوزظذ """ wordtext = CharsNotIn(u'//*؟^():"{}[]$><%~#،,\' +-|') escape = Suppress( escapechar ) \ + ( Word( printables, exact = 1 ) | White( exact = 1 ) ) wordtoken = Combine(OneOrMore(wordtext | escape)) # A plain old word. plainWord = Group(wordtoken).setResultsName("Word") # A wildcard word containing * or ?. wildchars = Word(u"؟?*") # Start with word chars and then have wild chars mixed in wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken)) # Or, start with wildchars, and then either a mixture of word and wild chars # , or the next token wildstart = wildchars \ + ( OneOrMore( wordtoken + Optional( wildchars ) ) \ | FollowedBy( White() \ | StringEnd() ) ) wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard") # A range of terms startfence = Literal("[") endfence = Literal("]") rangeitem = QuotedString('"') | wordtoken to = Keyword( u"الى" ) \ | Keyword( u"إلى" ) \ | Keyword( "To" ) \ | Keyword( "to" ) \ | Keyword( "TO" ) openstartrange = Group( Empty() ) \ + Suppress( to + White() ) \ + Group( rangeitem ) openendrange = Group( rangeitem ) \ + Suppress( White() + to ) \ + Group( Empty() ) normalrange = Group( rangeitem ) \ + Suppress( White() + to + White() ) \ + Group( rangeitem ) range = Group( startfence \ + ( normalrange | openstartrange | openendrange ) \ + endfence ).setResultsName( "Range" ) # synonyms syn_symbol = Literal("~") synonym = Group(syn_symbol + wordtoken).setResultsName("Synonyms") # antonyms ant_symbol = Literal("#") antonym = Group(ant_symbol + wordtoken).setResultsName("Antonyms") # derivation level 1,2 derive_symbole = Literal(u"<") | Literal(u">") derivation = Group(OneOrMore(derive_symbole) + wordtoken).setResultsName("Derivation") # spellerrors # spellerrors=Group(QuotedString('\'')).setResultsName("Errors") spellerrors_symbole = Literal(u"%") spellerrors = Group(spellerrors_symbole + wordtoken).setResultsName("SpellErrors") # shakl:must uplevel to boostable tashkil_symbol = Literal("'") tashkil = Group( tashkil_symbol + \ ZeroOrMore( wordtoken | White() ) + \ tashkil_symbol ).setResultsName( "Tashkil" ) # tuple search (root,pattern,type) starttuple = Literal("{") endtuple = Literal("}") bettuple = Literal(u"،") | Literal(",") wordtuple = Group(Optional(wordtoken)) tuple = Group( starttuple + \ wordtuple + \ ZeroOrMore( bettuple + wordtuple ) + \ endtuple ).setResultsName( "Tuple" ) # A word-like thing generalWord = range | wildcard | plainWord | tuple | antonym | synonym | \ derivation | tashkil | spellerrors # A quoted phrase quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group( (Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = generalWord | quotedPhrase boostedUnit = Group( boostableUnit + \ Suppress( "^" ) + \ Word( "0123456789", ".0123456789" ) ).setResultsName( "Boost" ) # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group( ( Word( alephba + "_" ) | Word( alphanums + "_" ) ) + \ Suppress( ':' ) + \ fieldableUnit ).setResultsName( "Field" ) # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group( Suppress( Keyword( u"ليس" ) | Keyword( u"NOT" ) ) + \ Suppress( White() ) + \ unit ).setResultsName( "Not" ) generalUnit = operatorNot | unit andToken = Keyword(u"و") | Keyword(u"AND") orToken = Keyword(u"أو") | Keyword(u"او") | Keyword(u"OR") andNotToken = Keyword(u"وليس") | Keyword(u"ANDNOT") operatorAnd = Group( ( generalUnit + \ Suppress( White() ) + \ Suppress( andToken ) + \ Suppress( White() ) + \ expression ) | \ ( generalUnit + \ Suppress( Literal( u"+" ) ) + \ expression ) ).setResultsName( "And" ) operatorOr = Group( ( generalUnit + \ Suppress( White() ) + \ Suppress( orToken ) + \ Suppress( White() ) + \ expression ) | \ ( generalUnit + \ Suppress( Literal( u"|" ) ) + \ expression ) ).setResultsName( "Or" ) operatorAndNot = Group( ( unit + \ Suppress( White() ) + \ Suppress( andNotToken ) + \ Suppress( White() ) + \ expression ) | \ ( unit + \ Suppress( Literal( u"-" ) ) + \ expression ) ).setResultsName( "AndNot" ) expression <<= ( OneOrMore( operatorAnd | operatorOr | operatorAndNot | \ generalUnit | Suppress( White() ) ) | Empty() ) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
def _parse_line(self): """ Parses a single line, and returns a node representing the active context Further lines processed are expected to be children of the active context, or children of its accestors. ------------------------------------------------ Basic grammar is as follows: line = <mako>|<nemo>|<string> <mako> We don't parse normally parse tags, so the following info is sketchy. Mako tags are recognized as anythign that starts with: - <% - %> - %CLOSETEXT - </% Mako Control tags however are parsed, and required to adhere to the same indentation rules as Nemo tags. mako_control = <start>|<middle>|<end> start = (for|if|while) <inner>: middle = (else|elif): end = endfor|endwhile nemo = % ( <mako_control>|<nemo_statement> ) nemo_statement = .<quote><string><quote>|#<quote><string><quote>|<words> <quote> = '|" Notes: Quotes are required to be balanced. Quotes preceded by a \ are ignored. <string> = * words = \w+ """ #if self.debug: print '\t ' + str(self._current_node) # PyParser setParseAction's actually execute during parsing, # So we need closures in order to change the current scope def depth_from_indentation(function): """ Set the depth as the start of the match """ def wrap(start, values): #print 'Depth %d | %d %s' %(self._depth, start, values) #self._depth = start self._current_node = function(values) #print self._current_node return '' return wrap def depth_from_match(function): """ Set the depth as the start of the match """ def wrap(start, values): #print 'Depth %d | %d %s' %(self._depth, start, values) #print self._current_node self._depth = start self._current_node = function(values) #print self._current_node return '' return wrap def depth_from_nemo_tag(function): """ Start of the match is where the nemo tag is. Pass the other values to the wrapped function """ def wrap(start, values): # print 'Depth %d | %d %s' %(self._depth, start, values) self._depth = start tokens = values[1] self._current_node = function(tokens) #print self._current_node return '' return wrap # Match HTML from pyparsing import NotAny, MatchFirst html = restOfLine html.setParseAction(depth_from_indentation(self._add_html_node)) # Match Mako control tags nemo_tag = Literal('%') begin = Keyword('for') | Keyword('if') | Keyword('while') middle = Keyword('else') | Keyword('elif') end = Keyword('endfor') | Keyword('endif') | Keyword('endwhile') control = nemo_tag + (begin | middle | end) begin.setParseAction(depth_from_indentation(self._add_nesting_mako_control_node) ) middle.setParseAction(depth_from_indentation(self._add_mako_middle_node)) end.setParseAction(depth_from_indentation(self._add_mako_control_leaf)) # Match Nemo tags argument_name = Word(alphas,alphanums+"_-:") argument_value = quotedString regular_argument = argument_name + Literal('=') + argument_value class_name = Literal('.').setParseAction(lambda x: 'class=') id_name = Literal('#').setParseAction(lambda x: 'id=') special_argument = (class_name | id_name) + argument_value argument = Combine(special_argument) | Combine(regular_argument) # Match single Nemo statement (Part of a multi-line) inline_nemo_html = Word(alphas) + Group(ZeroOrMore(argument)) inline_nemo_html.setParseAction(depth_from_match(self._add_nemo_node)) # Match first nemo tag on the line (the one that may begin a multi-statement expression) nemo_html = nemo_tag + Group(Word(alphanums+"_-:") + Group(ZeroOrMore(argument))) nemo_html.setParseAction(depth_from_nemo_tag(self._add_nemo_node)) # Match a multi-statement expression. Nemo statements are seperated by |. Anything after || is treated as html separator = Literal('|').suppress() html_separator = Literal('||') # | Literal('|>') nemo_list = nemo_html + ZeroOrMore( separator + inline_nemo_html ) inline_html = html.copy() inline_html.setParseAction(depth_from_match(self._add_inline_html_node)) nemo_multi = nemo_list + Optional(html_separator + inline_html) # Match empty Nemo statement empty = nemo_tag + Empty() empty.setParseAction(depth_from_indentation(self._add_blank_nemo_node)) # Match unused Mako tags mako_tags = Literal('<%') | Literal('%>') | Literal('%CLOSETEXT') | Literal('</%') mako = mako_tags mako_tags.setParseAction(depth_from_indentation(self._add_html_node)) # Matches General nemo = (control | nemo_multi | empty) line = mako_tags | nemo | html # Depth Calculation (deprecated?) self._depth = len(self._c) - len(self._c.strip()) #try: line.parseString(self._c)
password_word = Word(printables) password = password_verb + password_word('old') + password_word('new') + LineEnd() help_verb = oneOf('help ?', caseless=True)('verb') help_verb.setParseAction(replaceWith('help')) help = help_verb + Optional(Word(alphas)('topic')) + LineEnd() xyzzy = CaselessLiteral('xyzzy')('verb') + LineEnd() catchall = Word(alphanums)('verb') + Optional(words)('rest') + LineEnd() section = Empty()('section') section.setParseAction(replaceWith('actions')) if conf.talkmode: actions_parser = section + (info | time | take | get | drop | put | inventory | wear | remove | use | lock | listen | look | unlock | follow | exits | say | tell | shout | emote | think | recap | quit_ | who | stats | set | unset | password | xyzzy | help | go | catchall) else: actions_parser = section + (info | time | take | get | drop | put | inventory | wear | remove | use | lock | listen | look | unlock | follow | exits | say | shout | emote | think | recap | quit_ | who | stats | set | unset | password | xyzzy | help | go | catchall) wiz = CaselessLiteral('@')('section') wiz.setParseAction(replaceWith('wizard')) info_verb = CaselessLiteral('info')('verb') info = info_verb + Optional(objref) + LineEnd()