class NginxParser(object): """ A class that parses nginx configuration with pyparsing """ # constants left_bracket = Literal("{").suppress() right_bracket = Literal("}").suppress() semicolon = Literal(";").suppress() space = White().suppress() key = Word(alphanums + "_/") value = CharsNotIn("{};,") value2 = CharsNotIn(";") location = CharsNotIn("{};," + string.whitespace) ifword = Literal("if") setword = Literal("set") # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules assignment = (key + Optional(space + value) + semicolon) setblock = (setword + OneOrMore(space + value2) + semicolon) block = Forward() ifblock = Forward() subblock = ZeroOrMore(Group(assignment) | block | ifblock | setblock) ifblock = ( ifword + SkipTo('{') + left_bracket + subblock + right_bracket) block << Group( Group(key + Optional(space + modifier) + Optional(space + location)) + left_bracket + Group(ZeroOrMore(Group(assignment) | block | ifblock | setblock)) + right_bracket) script = OneOrMore(Group(assignment) | block).ignore(pythonStyleComment) def __init__(self, source): self.source = source def parse(self): """ Returns the parsed tree. """ return self.script.parseString(self.source) def as_list(self): """ Returns the list of tree. """ return self.parse().asList()
def initGrammar(self): L_Equals = Word("=") N_comment = htmlComment() N_name = CharsNotIn("{}|[]") N_simpleText = SkipTo( oneOf(["{{", "|", "[[", "]]", "}}", "'''", "<ref"])) N_elements = Forward() N_apostrofs = QuotedString("'''").setParseAction( lambda s, l, t: {'APOSTROFS': t}) N_link = nestedExpr( opener="[[", closer="]]", content=N_name + Optional("|" + delimitedList(CharsNotIn("[]"), delim="|")) ).setParseAction(self.genLink) N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction( lambda s, l, t: {'HEADER': t}) N_template = Forward() N_key = CharsNotIn("{}|=") # N_value = ZeroOrMore(CharsNotIn("{}|")) + ZeroOrMore(N_template + ZeroOrMore(CharsNotIn("{}|"))).setResultsName('VALUE') N_keyValues = "|" + delimitedList( Group(Optional(N_key) + Optional("=" + N_elements)), delim="|") N_label_content = N_template | ("{{" + OneOrMore("!") + "}}") | CharsNotIn("{}|") N_label = nestedExpr(opener="{", closer="}", content=N_label_content) N_template << nestedExpr( opener="{{", closer="}}", content=N_name + Optional(N_keyValues)).setParseAction(self.genTemplate) ref_start, ref_end = makeHTMLTags("ref") N_named_ref = ref_start + SkipTo(ref_end) + ref_end N_named_ref.setParseAction(lambda s, l, t: {'REF': t}) N_element = N_comment | N_simpleText | N_named_ref | N_apostrofs | N_link | N_header | N_template | N_label # N_ref = nestedExpr( opener="<ref>", closer="</ref>", content=N_elements).setParseAction( lambda s,l,t: {'REF' : t} ) N_elements << ZeroOrMore(N_element) self.N_S = N_elements
def is_header_content_type_missing(filename: str) -> bool: """Check if Content-Type header is missing. Verifies if the file has the tags:: <META HTTP-EQUIV="Content-Type" CONTENT="no-cache"> :param filename: Path to the ``HTML`` source. :returns: True if tag ``meta`` have attributes ``http-equiv`` and ``content`` set as specified, False otherwise. """ tag = 'meta' tk_content = CaselessKeyword('content') tk_type = CaselessKeyword('type') prs_cont_typ = tk_content + Literal('-') + tk_type tk_type = SkipTo(Literal('/'), include=True) tk_subtype = SkipTo(Literal(';'), include=True) prs_mime = tk_type + tk_subtype tk_charset = CaselessKeyword('charset') tk_charset_value = SkipTo(stringEnd) prs_charset = tk_charset + Literal('=') + tk_charset_value prs_content_val = prs_mime + prs_charset attrs = {'http-equiv': prs_cont_typ, 'content': prs_content_val} try: has_content_type = _has_attributes(filename, tag, attrs) except FileNotFoundError as exc: show_unknown('There was an error', details=dict(error=str(exc))) return False if not has_content_type: result = True show_open('Attributes in {}'.format(filename), details=dict(attributes=str(attrs))) else: result = False show_close('Attributes in {}'.format(filename), details=dict(attributes=str(attrs))) return result
def initGrammar(self): N_comment = htmlComment().setParseAction(self.genComment) N_name = CharsNotIn("{}|[]") N_link = nestedExpr( opener="[[", closer="]]", content=N_name + Optional("|" + delimitedList(CharsNotIn("[]"), delim="|")) ).setParseAction(self.genLink).setDebug(True) L_Equals = Word("=") N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction( self.genHeader) N_element = Forward() N_template = Forward().setDebug(True) N_key = CharsNotIn("{}|=") N_internalText = CharsNotIn("{}|=<[") + SkipTo( Literal("{{") | Literal("[[") | Literal("<!--") | Literal("<ref") | Literal("|") | Literal("}}")) #CharsNotIn("{}|[]<") N_insideElements = OneOrMore(N_element | N_internalText).setDebug(True) N_keyValue = Group( Optional(N_key) + Optional(Literal("=") + N_insideElements)).setDebug(True) N_keyValues = "|" + delimitedList(N_keyValue, delim="|") N_keyValues.setDebug(True) #N_label_content = N_template | ("{{"+OneOrMore("!")+"}}") | CharsNotIn("{}|") #N_label = nestedExpr( opener="{", closer="}", content = N_label_content) N_template << nestedExpr( opener="{{", closer="}}", content=N_name + Optional(N_keyValues)).setParseAction(self.genTemplate) #ref_start, ref_end = makeHTMLTags("ref") #N_named_ref = ref_start + SkipTo(ref_end) + ref_end #N_named_ref.setParseAction( lambda s,l,t: {'REF' : t} ) N_element = N_comment | N_link | N_header | N_template N_element.setDebug(True) self.N_S = N_element
def _get_block(file_lines, line) -> str: """ Return a Python block of code beginning in line. :param file_lines: Lines of code :param line: First line of block """ frst_ln = file_lines[line - 1] file_lines = file_lines[line - 1:] rem_file = "\n".join(file_lines) indent_stack = [len(frst_ln) - len(frst_ln.lstrip(' ')) + 1] prs_block = Forward() block_line = SkipTo(LineEnd()) block_header = SkipTo(LineEnd()) block_body = indentedBlock(prs_block, indent_stack) block_def = Group(block_header + block_body) # pylint: disable=pointless-statement prs_block << (block_def | block_line) block_list = prs_block.parseString(rem_file).asList() block_str = (lang.lists_as_string(block_list, '', 0)) return block_str.rstrip()
def get_conditional_stmt(_stmt): condition = SkipTo(':').set_results_name('condition') + Suppress(':') _suite = IndentedBlock(_stmt) if_decl = Literal('if') + condition elif_decl = Literal('elif') + condition else_decl = Literal('else:') if_ = Group(if_decl + _suite) elif_ = Group(elif_decl + _suite) else_ = Group(else_decl + _suite) return Group(if_ + Opt(OneOrMore(elif_)) + Opt(else_)).set_parse_action(parse_conditional)
def expr(self) -> ParserElement: NL = LineEnd() LIST_BREAK = NL + Optional(White(" \t")) + NL | StringEnd() IGNORE = BlockQuote(**self.init_kwargs).expr | Panel( **self.init_kwargs).expr | Color(**self.init_kwargs).expr ROW = LineStart() + Combine( Optional(self.nested_token, default="") + ListIndent(self.indent_state, self.tokens) + SkipTo(NL + Char(self.nested_token + self.tokens) | LIST_BREAK, ignore=IGNORE) + Optional(NL), ) return OneOrMore(ROW, stopOn=LIST_BREAK).setParseAction(self.action)
def parse_element(cls, indent_stack): """Sets ``connect`` attribute to the rule""" return ( Keyword("@connect").suppress() + Literal('(').suppress() + quotedString.setResultsName('field').setParseAction(removeQuotes) + Optional(Literal(',').suppress() + SkipTo(')')).setResultsName('func') + Literal(')').suppress()).setResultsName("connect").setParseAction( lambda toks: { 'connected_field': toks.field, 'update_function': toks.func[0] if toks.func else None })
def clear_sql(sql: str) -> str: ParserElement.defaultWhitespaceChars = (" \t") comment = nestedExpr('/*', '*/').suppress() starting = ZeroOrMore(comment.suppress()) ending = ZeroOrMore(comment | ';').suppress() + StringEnd() expr = starting + SkipTo(ending) + ending r = expr.parseString(sql) if len(r) != 1: raise Exception('Error while parsing expr') return r[0]
def check_main_syntax(self, code): # Return value for main is optional in C++11 parser = Literal("int") + Literal("main") + Literal("(") + SkipTo( Literal(")")) + Literal(")") if len(parser.searchString(code)): main_prefix = Literal("int") + Literal("main") + Literal("(") full_use = Literal("int") + "argc" + "," + Optional( "const") + "char" + "*" + "argv" + "[" + "]" + ")" # 3 options for main() syntax if not len((main_prefix + Literal(")")).searchString(code)) and \ not len((main_prefix + Literal("void") + Literal(")")).searchString(code)) and \ not len((main_prefix + full_use).searchString(code)): self.add_error(label="MAIN_SYNTAX")
def expr(self) -> ParserElement: NON_ALPHANUMS = Regex(r"\W", flags=re.UNICODE) TOKEN = Suppress(self.TOKEN) IGNORE = White() + TOKEN | self.get_ignore_expr() ELEMENT = Combine( TOKEN + (~White() & ~Char(self.TOKEN)) + SkipTo(TOKEN, ignore=IGNORE, failOn="\n") + TOKEN + FollowedBy(NON_ALPHANUMS | StringEnd()), ) return (StringStart() | PrecededBy(NON_ALPHANUMS, retreat=1)) + Combine( ELEMENT.setParseAction(self.action) + Optional(~ELEMENT, default=" "), )
def expr(self) -> ParserElement: MENTION = Combine( "[" + Optional( SkipTo("|", failOn="]") + Suppress("|"), default="", ) + "~" + Optional(CaselessLiteral("accountid:")) + Word(alphanums + ":-").setResultsName("accountid") + "]", ) return ((StringStart() | Optional(PrecededBy(White(), retreat=1), default=" ")) + MENTION.setParseAction(self.action) + (StringEnd() | Optional(FollowedBy( White() | Char(punctuation, excludeChars="[") | MENTION), default=" ")))
def _parse_template(self, options, template): """Parse a template string.""" variable_name = Word(alphanums + " " + "-" + "_") variable_prefix = Optional(Word(alphanums) + ":") variable = "{" + variable_prefix + variable_name + "}" variable.setParseAction(self._replace_variable(options)) i18n_label_start = "{lang:" i18n_label_end = "}" i18n_label = i18n_label_start + SkipTo(i18n_label_end).leaveWhitespace() + i18n_label_end i18n_label.setParseAction(self._replace_i18n_label(options)) block_type_name = oneOf("Text Photo Panorama Photoset Quote Link Chat Video Audio") block_type_start = "{block:" + block_type_name + "}" block_type_end = "{/block:" + block_type_name + "}" block_type = block_type_start + SkipTo(matchingCloseTag(block_type_start, block_type_end).leaveWhitespace(), include=True) block_type.setParseAction(self._replace_block_type(options)) block_cond_name = Word(alphanums + "-" + "_") block_cond_start = "{block:If" + Optional("Not") + block_cond_name + "}" block_cond_end = "{/block:If" + Optional("Not") + block_cond_name + "}" block_cond = block_cond_start + SkipTo(matchingCloseTag(block_cond_start, block_cond_end).leaveWhitespace(), include=True) block_cond.setParseAction(self._replace_block_cond(options)) block_def_cond_name = Word(alphanums + "-" + "_") block_def_cond_start = "{block:" + block_def_cond_name + "}" block_def_cond_end = "{/block:" + block_def_cond_name + "}" block_def_cond = block_def_cond_start + SkipTo(matchingCloseTag(block_def_cond_start, block_def_cond_end).leaveWhitespace(), include=True) block_def_cond.setParseAction(self._replace_block_def_cond(options)) block_iter_name = oneOf("Posts Tags") block_iter_start = "{block:" + block_iter_name + "}" block_iter_end = "{/block:" + block_iter_name + "}" block_iter = block_iter_start + SkipTo(matchingCloseTag(block_iter_start, block_iter_end).leaveWhitespace(), include=True) block_iter.setParseAction(self._replace_block_iter(options)) parser = (block_iter | block_type | block_cond | block_def_cond | i18n_label | variable) return parser.transformString(template)
def expr(self) -> ParserElement: INTENSITY = Word(nums) ALPHA = Word(nums + ".") SEP = "," + Optional(White()) RGBA = (CaselessLiteral("rgba(") + INTENSITY.setResultsName("red") + SEP + INTENSITY.setResultsName("green") + SEP + INTENSITY.setResultsName("blue") + SEP + ALPHA + ")") COLOR = Word("#", hexnums) ^ Word(alphas) ^ RGBA expr = Combine( "{color:" + COLOR.setResultsName("color") + "}" + SkipTo("{color}").setResultsName("text") + "{color}", ) return expr.setParseAction(self.action)
def _define_grammar(self): g = {} label = Literal('Contents') | Literal('Caption title') | \ Literal('Sub-caption') | Literal('Half-title') | \ Literal('Footline') | Literal('Comments') | \ Literal('Modificatons') | Literal('Errors') | \ Literal('DMF') | Literal('ADF') copies_label = LineStart() + Literal('Copies') all_chars = u''.join( unichr(c) for c in xrange(65536) if unicodedata.category(unichr(c)).startswith('L')) section_separator = LineEnd() + FollowedBy(label | copies_label | StringEnd()) section = SkipTo(section_separator) library = Combine(Word(all_chars) + Literal(u'-') + Word(all_chars)) copy_separator = LineEnd() + FollowedBy(library) | \ LineEnd() + StringEnd() | StringEnd() copy = library + SkipTo(copy_separator) + Suppress(copy_separator) g['comments'] = Suppress('Comments') + SkipTo(section_separator) g['code'] = StringStart() + SkipTo(LineEnd()) + Suppress(LineEnd()) g['title'] = Suppress(g['code']) + Suppress(LineEnd()) + section g['copies'] = Suppress(copies_label) + OneOrMore(Group(copy)) return g
def getToken(self): begin = self._getBeginToken() end = self._getEndToken().suppress() forbidden = self._getForbiddenToken() no_format = NoFormatFactory.make(self.parser) token = Forward() inside = SkipTo(end, failOn=forbidden, ignore=no_format | token).leaveWhitespace() token << begin + inside + end token = token.setParseAction(self.conversionParseAction)(self.name) return token
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() tag_begin = Literal("<").suppress() tag_end = Literal(">").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' ival=Regex('[-]?\d+') dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?') lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)') # Helper definitions kstr= quotedString.setParseAction(removeQuotes) ^ \ dval ^ ival ^ lval ^ Word(prtable) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | tag_sect ) #| vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions ival.setParseAction(self.conv_ival) dval.setParseAction(self.conv_dval) lval.setParseAction(self.conv_lval) keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) tag_sect.setParseAction(self.add_sect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack, ignore=escapedChar) + rbrack.suppress()) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?"))) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = (reLiteral | reRange | reMacro | reDot | reGroup) reExpr << operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ]) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def parse_config_file(filepath): """ This function defines that to parsed the netscalar input file :param filepath: path of netscalar input configuration :return: return parsed dict """ EOL = LineEnd().suppress() comment = Suppress("#") + Suppress(restOfLine) + EOL SOL = LineStart().suppress() blank_line = SOL + EOL result = [] hyphen = Literal("-") not_hyphen_sign = ''.join(c for c in printables if c != '-') text = Word(not_hyphen_sign, printables) key = Word('-', printables).setParseAction(lambda t: t[0].replace('-', '', 1)) val = originalTextFor(Optional(ZeroOrMore(text), default=None)) option = Group(key + val) multi_word_names = quotedString q_obj = originalTextFor(Keyword('q{') + SkipTo(Keyword("}"))) command = Group( OneOrMore(q_obj | multi_word_names | text) + ZeroOrMore(option)) command.ignore(comment | blank_line) with open(filepath) as infile: line_no = 1 print("Parsing Input Configuration...") lines = infile.readlines() total_lines = len(lines) for line in lines: try: tmp = command.parseString(line) tokens = tmp.asList() if tokens: tokens[0].append(['line_no', str(line_no)]) result += tokens line_no += 1 except Exception as exception: line_no += 1 LOG.error("Parsing error: " + line) msg = "Parsing started..." if line_no <= total_lines: ns_util.print_progress_bar(line_no, total_lines, msg, prefix='Progress', suffix='') return result
def __init__(self): """Parser for instruction. Example: {{<a>},{<a>},{<a>},{<a>}} {{<!>},{<!>},{<!>},{<a>}} <{o"i!a,<{i<a> """ debug = False self.garbo_count = 0 LBRACK, RBRACK, LBRACE, RBRACE, BANG = map(Suppress, "<>{}!") nonspecial = CharsNotIn('<>{}!') ignored = Word('!', printables, exact=2) enclosed_garbo = SkipTo(Literal('>'), ignore=ignored) val_str = Forward() garbo_str = Forward() item = Forward() # a parsed item item = (ignored | garbo_str | val_str | nonspecial).setDebug(debug) # stuff in {}s val_str << nestedExpr('{', '}', content=item, ignoreExpr=None).setDebug(debug) # stuff in <>s (suppressed) garbo_str << (LBRACK + Optional(enclosed_garbo) + RBRACK).setDebug(debug) def cvt_list(toks): return toks.asList() val_str.setParseAction(cvt_list) def take_garbo(s, loc, toks): m = toks[0] ig_str = re.sub(r'!.', '', m) ln = len(ig_str) self.garbo_count += ln return f"<GARBO: {ln}>" enclosed_garbo.setParseAction(take_garbo) ignored.setParseAction(lambda: '!IGNORED') # pattern build self._pattern = item
def getkw_bnf(self): lcb = Literal("{").suppress() rcb = Literal("}").suppress() lsb = Literal("[").suppress() rsb = Literal("]").suppress() lps = Literal("(").suppress() rps = Literal(")").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_sect = rcb end_data = Literal('$end').suppress() prtable = srange("[0-9a-zA-Z]") + '!$%&*+-./<>?@^_|~:' kstr = Word(prtable) ^ quotedString.setParseAction(removeQuotes) name = Word(alphas + "_", alphanums + "_") vec=lsb+delimitedList(Word(prtable) ^ Literal("\n").suppress() ^\ quotedString.setParseAction(removeQuotes))+rsb key = kstr ^ vec keyword = name + eql + kstr vector = name + eql + vec data = Combine(dmark + name) + SkipTo(end_data) + end_data data.setParseAction(self.store_data) sect = name + lcb sect.setParseAction(self.add_sect) key_sect = name + Group(lps + kstr + rps) + lcb key_sect.setParseAction(self.add_sect) vec_sect = name + Group(lps + vec + rps) + lcb vec_sect.setParseAction(self.add_vecsect) end_sect.setParseAction(self.pop_sect) keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) section = Forward() input = section ^ data ^ keyword ^ vector sectdef = sect ^ key_sect ^ vec_sect section << sectdef + ZeroOrMore(input) + rcb bnf = ZeroOrMore(input) bnf.ignore(pythonStyleComment) return bnf
def computeVariables(self, text): # Literals dollar = Literal('$') eq = Literal("=") eol = LineEnd().suppress() # Declare Variable startVar = (dollar.suppress() + Word(alphanums).setResultsName("name") + eq.suppress()) declareVariables = (OneOrMore( Group(startVar + SkipTo(startVar | Literal("--") | eol).setResultsName( "content")).setResultsName("variables*"))) declareVariables.ignore(Literal("==")) token = declareVariables.searchString(text) for var in token: for name, content in var: self.descVariables[name] = self.compute(content, verbose=False)
def parse(self, data=""): if data: self.raw = data SPACE = White().suppress() HOST = Literal("Host").suppress() KEY = Word(alphanums + "~*._-/") VALUE = Word(alphanums + "~*._-/") paramValueDef = SkipTo("#" | lineEnd) indentStack = [1] HostDecl = HOST + SPACE + VALUE paramDef = Dict(Group(KEY + SPACE + paramValueDef)) block = indentedBlock(paramDef, indentStack) HostBlock = Dict(Group(HostDecl + block)) try: return OneOrMore(HostBlock).ignore(pythonStyleComment).parseString(self.raw) except ParseException as e: return None
def check_location(symbol, expected): pattern = Word(alphanums + '._').setResultsName('actual') + Word( hexnums) + Literal(symbol) + LineEnd() pattern = SkipTo(pattern) + pattern try: results = pattern.parseString(contents) except ParseException: print("check placement fail: '%s' was not found" % (symbol)) exit(1) if results.actual != expected: print("check placement fail: '%s' was placed in '%s', not in '%s'" % (symbol, results.actual, expected)) exit(1) print("check placement pass: '******' was successfully placed in '%s'" % (symbol, results.actual))
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() arg_begin = Literal("(").suppress() arg_end = Literal(")").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' # Helper definitions kstr=Word(prtable) ^ quotedString.setParseAction(removeQuotes) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin key_sect=name+Group(arg_begin+kstr+arg_end)+sect_begin vec_sect=name+Group(arg_begin+vec+ arg_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | key_sect | vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) key_sect.setParseAction(self.add_sect) vec_sect.setParseAction(self.add_vecsect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
def get_linked_articles(self, wikipage): # Define the pyparsing grammar for a URL, that is: # URLlink ::= <a href= URL>linkText</a> # URL ::= doubleQuotedString | alphanumericWordPath # Note that whitespace may appear just about anywhere in the link. Note also # that it is not necessary to explicitly show this in the pyparsing grammar; by default, # pyparsing skips over whitespace between tokens. linkOpenTag, linkCloseTag = makeHTMLTags("a") link = linkOpenTag + SkipTo(linkCloseTag).setResultsName( "body") + linkCloseTag.suppress() # Go get some HTML with some links in it. # serverListPage = urllib.urlopen( "http://de.wikipedia.org/w/index.php?title=Hauptseite&redirect=no" ) # htmlText = serverListPage.read() # serverListPage.close() # # print htmlText # # scanString is a generator that loops through the input htmlText, and for each # match yields the tokens and start and end locations (for this application, we are # not interested in the start and end values). articles = set() for toks, strt, end in link.scanString(wikipage): if (len(toks.startA.href) != 0 and #remove empty links toks.startA.href.find('#') == -1 and #remove anchors toks.startA.href.find(':') == -1 and #remove wikipedia special links toks.startA.href.find('?') == -1 and #remove wikipedia special links toks.startA.href.find('Hauptseite') == -1): #remove link to main page if (toks.body == "Artikel"): articlename = toks.startA.href.lstrip( '/wiki/' ) #save real(!) article name, so we don't get confused by redirects else: articles.add(toks.startA.href.lstrip('/wiki/')) #print toks.startA.href,"->",toks.body return (articles, articlename)
def _grammar(self): ident = Word(alphanums + ".") semi = Literal(";").suppress() # lrb = Literal("(").suppress() # rrb = Literal(")").suppress() lcb = Literal("{").suppress() rcb = Literal("}").suppress() Value = SkipTo(semi) KeyValue = Dict(Group(ident + Value + semi)) Dictionary = Forward() Block = lcb + ZeroOrMore(Dictionary | KeyValue) + rcb Dictionary << Dict(Group(ident + Block)) ParameterFile = ZeroOrMore(Dictionary | KeyValue) ParameterFile.ignore(cStyleComment) ParameterFile.ignore(cppStyleComment) return ParameterFile
def parse_block(self, block_text): """Parses sql block into tokens """ # Valid grammar looks like this: # {sqllinechart: title='Some string' | colors=green, yellow} # make a grammar block_start = Literal("{") sql_start = Keyword(self.TAGNAME, caseless=True) colon = Literal(":") sql_end = Literal("}") separator = Literal("|") block_end = Keyword("{" + self.TAGNAME + "}", caseless=True) # params field_name = Word(alphanums) equal_sign = Suppress(Literal("=")) # whatever value field_value = (CharsNotIn("|}")) # param name and value param_group = Group(field_name + equal_sign + field_value) # list of all params param_list = delimitedList(param_group, '|') # helper param_dict = Dict(param_list) # sql text sql_text = SkipTo(block_end) sqldecl = Forward() sqldecl << (block_start + sql_start + Optional(colon) + Optional(param_dict) + sql_end + sql_text.setResultsName('sqltext') + block_end) block_str = "".join(block_text) tokens = sqldecl.parseString(block_str) return tokens
def cut_from_tail(self, text): ''' Removes 'text' from end of sql. Not case sensitive. ''' text_arr = text.split(' ') ending = CaselessKeyword(text_arr[0]) for x in text_arr[1:]: ending = ending + CaselessKeyword(x) ending = ending + StringEnd() expr = (originalTextFor(SkipTo(ending)))('original') + (originalTextFor(ending))('ending') try: r = expr.parseString(self._sql) except ParseException: return False self._sql = r.asDict()['original'].strip() return True
def get_ball_info(self, line): left_p = Literal("(").suppress() right_p = Literal(")").suppress() frame_number = Word(nums) show_frame = Word("show ") + frame_number ball = left_p + left_p + Literal("b") + right_p + Group( Word(nums + "-.") * 4) + right_p frame_line = left_p + Group(show_frame).suppress() + ball + SkipTo( lineEnd).suppress() parsed_ball_info = frame_line.parseString(line) ball_info = { "pos_x": parsed_ball_info[1][0], "pos_y": parsed_ball_info[1][1], "vel_x": parsed_ball_info[1][2], "vel_y": parsed_ball_info[1][3] } return ball_info