def init_latex_parser(self): # these variables will be used to define valid lists of characters lowers = 'qwertyuiopasdfghjklzxcvbnm' uppers = lowers.upper() alphas = lowers + uppers digits = '1234567890' other_word_symbols = '-_' word_chars = alphas + other_word_symbols + digits punctuation_symbols = '.!?:;…' command_symbol = '\\' white_characters = CharsNotIn(word_chars + punctuation_symbols + command_symbol) # define grammar word = Word(word_chars) command = command_symbol + OneOrMore( alphas ) # as far as i can tell, only alphas are allowed in a latex command (bar the special commands such as \&). punc = Word(punctuation_symbols) white_and_word = Group(Suppress(Optional(white_characters)) + word) white_and_command = Group( Suppress(Optional(white_characters)) + command) white_and_punc = Group(Suppress(Optional(white_characters)) + punc) sentence = ZeroOrMore(white_and_word) + white_and_punc pure_piece = ZeroOrMore(sentence) bracketed_piece = '{' + pure_piece + Suppress( ZeroOrMore(white_characters)) + '}' self.parser = content.parseString
def func_tokens(dictionary, parse_action): func_name = Word(alphas + '_', alphanums + '_') func_ident = Combine('$' + func_name.copy()('funcname')) func_tok = func_ident + originalTextFor(nestedExpr())('args') func_tok.leaveWhitespace() func_tok.setParseAction(parse_action) func_tok.enablePackrat() rx_tok = Combine(Literal('$').suppress() + Word(nums)('num')) def replace_token(tokens): index = int(tokens.num) return dictionary.get(index, u'') rx_tok.setParseAction(replace_token) strip = lambda s, l, tok: tok[0].strip() text_tok = CharsNotIn(u',').setParseAction(strip) quote_tok = QuotedString('"') if dictionary: arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok)) else: arglist = Optional(delimitedList(quote_tok | text_tok)) return func_tok, arglist, rx_tok
class NLPyParser(NLBaseParser): """pyparsing--based implementation of the NLBaseParser """ notSpace = CharsNotIn(" \n") eq = Literal('=').suppress() value = (QuotedString('"', escChar=chr(92), unquoteResults=False) \ ^ OneOrMore(notSpace)) ts = Group(Literal('ts') + eq + value) event = Group(Literal('event') + eq + value) name = ~oneOf("ts event") + Word(alphanums + '-_.') nv = ZeroOrMore(Group(name + eq + value)) nvp = Each([ts, event, nv]) + White('\n').suppress() + StringEnd() def parseLine(self, line): try: rlist = self.nvp.parseString(line).asList() except ParseException as E: raise ValueError(E) result = {} for a in rlist: if self.parse_date and a[0] == 'ts': result[a[0]] = parse_ts(a[1]) else: result[a[0]] = a[1] return result
def pyparse_blk(text): def create_add_block(tokens): return Block.Block(tokens.title, tokens.color if tokens.color else 'white') left_bracket, right_bracket, equal_sign = map(Suppress, '[]=') color = (Word('#', hexnums, exact=7) | Word(alphanums, alphas))('color') empty_block = ( left_bracket + right_bracket)('empty_block').setParseAction(lambda: EmptyBlock) new_lines = Word('/')('new_lines').setParseAction( lambda tokens: len(tokens.new_lines)) title = CharsNotIn('[]/\n')('title').setParseAction( lambda tokens: tokens.title.strip()) block_data = Optional(color + Suppress(':')) + Optional(title) block_data.addParseAction(create_add_block) blocks = Forward() block = left_bracket + block_data + blocks + right_bracket blocks << Group( ZeroOrMore(Optional(new_lines) + OneOrMore(empty_block | block))) stack = [Block.create_root_block()] try: result = blocks.parseString(text, parseAll=True) assert len(result) == 1 blocks_list = result.asList()[0] populate_children(blocks_list, stack) except (ParseSyntaxException, ParseException) as parse_err: raise ValueError('Error {{0}}: {0}'.format(parse_err.lineno)) return stack[0]
def PIs(): """Parses Processing Instructions PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' >>> tests=( ... '''<?foo?>''', ... '''<?foo ?>''', ... '''<?foo abc def ghi ?>''', ... '''<?xml-stylesheet foo="bla" bar="x" ?>''', ... '''<?xml-stylesheet ... foo="bla" ... foo="x" ... ?>''', ... ) >>> p=PIs() >>> for t in tests: ... result=p.parseString(t) ... print result ['foo'] ['foo', ' '] ['foo', ' abc def ghi '] ['xml-stylesheet', ' foo="bla" bar="x" '] ['xml-stylesheet', '\\n foo="bla"\\n foo="x"\\n '] """ pi=Suppress('<?') + \ Word(alphas, alphanums+'-_')('pitarget') + \ ZeroOrMore(CharsNotIn('?>'))('picontents') + \ Suppress('?>') return pi
def _parser_piece_text(): """ Return PyParsing element to the text of a markdown link. """ # No double line breaks in markdown links double_line_break = (Word("\n\r", exact=1) + Optional(Word(" \t")) + Word("\n\r", exact=1)) # We will ignore escaped square brackets when match finding balanced # square brackets. ignore = Literal("\\[") | Literal("\\]") # The text parser will match text inside balanced brackets using the # nestedExpr helper function from PyParsing. # # Next we define the content that is allowed inside the brackets. content_character = ~FollowedBy(double_line_break) + CharsNotIn( "[]", exact=1) # Normally with nestedExpr, the content parser would be separately applied # to each whitespace-separated string within the nested expression. # However, since we set whitespaceChars to '', the content parser is # applied to characters one-at-a-time. # # If this ever changes, we would need to change content to something # like Combine(OneOrMore(~ignore + content_character)) content = content_character text = originalTextFor( nestedExpr( opener="[", closer="]", content=content, ignoreExpr=ignore, )).setResultsName("text") text.addParseAction(lambda s, l, toks: toks[0][1:-1]) return text
def pyparsing_parse(text): """ >>> import os >>> dirname = os.path.join(os.path.dirname(__file__), "data") >>> filename = os.path.join(dirname, "error1.blk") >>> pyparsing_parse(open(filename, encoding="utf8").read()) Traceback (most recent call last): ... ValueError: Error {0}: syntax error, line 8 >>> filename = os.path.join(dirname, "error2.blk") >>> pyparsing_parse(open(filename, encoding="utf8").read()) Traceback (most recent call last): ... ValueError: Error {0}: syntax error, line 1 >>> filename = os.path.join(dirname, "error3.blk") >>> pyparsing_parse(open(filename, encoding="utf8").read()) Traceback (most recent call last): ... ValueError: Error {0}: syntax error, line 4 >>> expected = "[white: ]\\n[lightblue: Director]\\n/\\n/\\n[white: ]\\n[lightgreen: Secretary]\\n/\\n/\\n[white: Minion #1]\\n[white: ]\\n[white: Minion #2]" >>> filename = os.path.join(dirname, "hierarchy.blk") >>> blocks = pyparsing_parse(open(filename, encoding="utf8").read()) >>> str(blocks).strip() == expected True >>> expected = "[#00CCDE: MessageBox Window\\n[lightgray: Frame\\n[white: ]\\n[white: Message text]\\n/\\n/\\n[goldenrod: OK Button]\\n[white: ]\\n[#ff0505: Cancel Button]\\n/\\n[white: ]\\n]\\n]" >>> filename = os.path.join(dirname, "messagebox.blk") >>> blocks = pyparsing_parse(open(filename, encoding="utf8").read()) >>> str(blocks).strip() == expected True """ def add_block(tokens): return Block.Block(tokens.name, tokens.color if tokens.color else "white") left_bracket, right_bracket = map(Suppress, "[]") new_rows = Word("/")("new_rows").setParseAction( lambda tokens: len(tokens.new_rows)) name = CharsNotIn("[]/\n")("name").setParseAction( lambda tokens: tokens.name.strip()) color = (Word("#", hexnums, exact=7) | Word(alphas, alphanums))("color") empty_node = (left_bracket + right_bracket).setParseAction(lambda: EmptyBlock) nodes = Forward() node_data = Optional(color + Suppress(":")) + Optional(name) node_data.setParseAction(add_block) node = left_bracket - node_data + nodes + right_bracket nodes << Group( ZeroOrMore(Optional(new_rows) + OneOrMore(node | empty_node))) stack = [Block.get_root_block()] try: results = nodes.parseString(text, parseAll=True) assert len(results) == 1 items = results.asList()[0] populate_children(items, stack) except (ParseException, ParseSyntaxException) as err: raise ValueError("Error {{0}}: syntax error, line " "{0}".format(err.lineno)) return stack[0]
def fromString(inputText): text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) lt = Suppress(Word("<")) gt = Suppress(Word(">")) eq = Suppress(Word("=")) identifier = Word(alphas+"_",alphanums+"_") typeIdentifier = Word(alphas+"_",alphanums+"_:") structIdentifer = Group(typeIdentifier.setResultsName('type') + identifier.setResultsName('identifier') + Optional(eq) + Optional(CharsNotIn(";").setResultsName('defaultValue')) + semicolon) structIdentifers = Group(OneOrMore(structIdentifer)) ## Imports idslImport = Suppress(Word("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) structDef = Word("struct").setResultsName('type') + identifier.setResultsName('name') + op + structIdentifers.setResultsName("structIdentifiers") + cl + semicolon dictionaryDef = Word("dictionary").setResultsName('type') + lt + CharsNotIn("<>").setResultsName('content') + gt + identifier.setResultsName('name') + semicolon sequenceDef = Word("sequence").setResultsName('type') + lt + typeIdentifier.setResultsName('typeSequence') + gt + identifier.setResultsName('name') + semicolon enumDef = Word("enum").setResultsName('type') + identifier.setResultsName('name') + op + CharsNotIn("{}").setResultsName('content') + cl + semicolon exceptionDef = Word("exception").setResultsName('type') + identifier.setResultsName('name') + op + CharsNotIn("{}").setResultsName('content') + cl + semicolon raiseDef = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore( Literal(',') + typeIdentifier ) decoratorDef = Literal('idempotent') | Literal('out') retValDef = typeIdentifier.setResultsName('ret') firstParam = Group( Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName('type') + identifier.setResultsName('name')) nextParam = Suppress(Word(',')) + firstParam params = firstParam + ZeroOrMore(nextParam) remoteMethodDef = Group(Optional(decoratorDef.setResultsName('decorator')) + retValDef.setResultsName('ret') + typeIdentifier.setResultsName('name') + opp + Optional( params).setResultsName('params') + clp + Optional(raiseDef.setResultsName('raise')) + semicolon ) interfaceDef = Word('interface').setResultsName('type') + typeIdentifier.setResultsName('name') + op + Group(ZeroOrMore(remoteMethodDef)).setResultsName('methods') + cl + semicolon moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef) module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore(moduleContent).setResultsName("contents") + cl + semicolon IDSL = idslImports.setResultsName("imports") + module.setResultsName("module") IDSL.ignore( cppStyleComment ) tree = IDSL.parseString(text) return tree
class Include(object): rule = (INCLUDE + LOPBRACK + CharsNotIn('>')("header") + ROPBRACK).setParseAction(lambda t: Include(t.header)) def __init__(self, header, parent=''): self.header = header self.parent = parent def __repr__(self): return "#include <{}>".format(self.header)
class NginxParser(object): """ A class that parses nginx configuration with pyparsing """ # constants left_bracket = Literal("{").suppress() right_bracket = Literal("}").suppress() semicolon = Literal(";").suppress() space = White().suppress() key = Word(alphanums + "_/") value = CharsNotIn("{};,") location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules assignment = (key + Optional(space + value) + semicolon) block = Forward() block << Group( Group(key + Optional(space + modifier) + Optional(space + location)) + left_bracket + Group(ZeroOrMore(Group(assignment) | block)) + right_bracket) script = OneOrMore(Group(assignment) | block).ignore(pythonStyleComment) def __init__(self, source): self.source = source def parse(self): """ Returns the parsed tree. """ return self.script.parseString(self.source) def as_list(self): """ Returns the list of tree. """ return self.parse().asList()
def grammar(self, value): seps = list({value.list_sep, value.range_sep, value.step_sep}) quotedstr = pp.quotedString.setParseAction(pp.removeQuotes) self.tokens = OrderedDict({ 'regex': Literal('r').suppress() + quotedstr, 'quoted': quotedstr, 'colnum': Grammar.integer, 'sep': Or(seps), 'name': CharsNotIn(seps), })
class RawNginxParser(object): # pylint: disable=expression-not-assigned """A class that parses nginx configuration with pyparsing.""" # constants space = Optional(White()) nonspace = Regex(r"\S+") left_bracket = Literal("{").suppress() right_bracket = space.leaveWhitespace() + Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + nonspace) condition = Regex(r"\(.+\)") # Matches anything that is not a special character AND any chars in single # or double quotes value = Regex(r"((\".*\")?(\'.*\')?[^\{\};,]?)+") location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules comment = space + Literal('#') + restOfLine() assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space map_statement = space + Literal( "map") + space + nonspace + space + dollar_var + space block = Forward() block << Group( # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(map_statement)).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | assignment) | block) + space).leaveWhitespace() + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block) + space + stringEnd script.parseWithTabs() def __init__(self, source): self.source = source def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) def as_list(self): """Returns the parsed tree as a list.""" return self.parse().asList()
def initGrammar(self): L_Equals = Word("=") N_comment = htmlComment() N_name = CharsNotIn("{}|[]") N_simpleText = SkipTo( oneOf(["{{", "|", "[[", "]]", "}}", "'''", "<ref"])) N_elements = Forward() N_apostrofs = QuotedString("'''").setParseAction( lambda s, l, t: {'APOSTROFS': t}) N_link = nestedExpr( opener="[[", closer="]]", content=N_name + Optional("|" + delimitedList(CharsNotIn("[]"), delim="|")) ).setParseAction(self.genLink) N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction( lambda s, l, t: {'HEADER': t}) N_template = Forward() N_key = CharsNotIn("{}|=") # N_value = ZeroOrMore(CharsNotIn("{}|")) + ZeroOrMore(N_template + ZeroOrMore(CharsNotIn("{}|"))).setResultsName('VALUE') N_keyValues = "|" + delimitedList( Group(Optional(N_key) + Optional("=" + N_elements)), delim="|") N_label_content = N_template | ("{{" + OneOrMore("!") + "}}") | CharsNotIn("{}|") N_label = nestedExpr(opener="{", closer="}", content=N_label_content) N_template << nestedExpr( opener="{{", closer="}}", content=N_name + Optional(N_keyValues)).setParseAction(self.genTemplate) ref_start, ref_end = makeHTMLTags("ref") N_named_ref = ref_start + SkipTo(ref_end) + ref_end N_named_ref.setParseAction(lambda s, l, t: {'REF': t}) N_element = N_comment | N_simpleText | N_named_ref | N_apostrofs | N_link | N_header | N_template | N_label # N_ref = nestedExpr( opener="<ref>", closer="</ref>", content=N_elements).setParseAction( lambda s,l,t: {'REF' : t} ) N_elements << ZeroOrMore(N_element) self.N_S = N_elements
def initGrammar(self): N_comment = htmlComment().setParseAction(self.genComment) N_name = CharsNotIn("{}|[]") N_link = nestedExpr( opener="[[", closer="]]", content=N_name + Optional("|" + delimitedList(CharsNotIn("[]"), delim="|")) ).setParseAction(self.genLink).setDebug(True) L_Equals = Word("=") N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction( self.genHeader) N_element = Forward() N_template = Forward().setDebug(True) N_key = CharsNotIn("{}|=") N_internalText = CharsNotIn("{}|=<[") + SkipTo( Literal("{{") | Literal("[[") | Literal("<!--") | Literal("<ref") | Literal("|") | Literal("}}")) #CharsNotIn("{}|[]<") N_insideElements = OneOrMore(N_element | N_internalText).setDebug(True) N_keyValue = Group( Optional(N_key) + Optional(Literal("=") + N_insideElements)).setDebug(True) N_keyValues = "|" + delimitedList(N_keyValue, delim="|") N_keyValues.setDebug(True) #N_label_content = N_template | ("{{"+OneOrMore("!")+"}}") | CharsNotIn("{}|") #N_label = nestedExpr( opener="{", closer="}", content = N_label_content) N_template << nestedExpr( opener="{{", closer="}}", content=N_name + Optional(N_keyValues)).setParseAction(self.genTemplate) #ref_start, ref_end = makeHTMLTags("ref") #N_named_ref = ref_start + SkipTo(ref_end) + ref_end #N_named_ref.setParseAction( lambda s,l,t: {'REF' : t} ) N_element = N_comment | N_link | N_header | N_template N_element.setDebug(True) self.N_S = N_element
def get_parser(): from pyparsing import CharsNotIn, ParserElement, Suppress, ZeroOrMore ParserElement.enablePackrat() word = CharsNotIn(f"{PERIOD}{LBRACK}{RBRACK}") idx = Suppress(LBRACK) + word + Suppress(RBRACK) attr = Suppress(PERIOD) + word parser = word + ZeroOrMore(attr ^ idx) parser.setParseAction(PERIOD.join) return parser
def build_parser(): key = Word(alphanums).setResultsName('key') value = restOfLine.setParseAction(lambda string, location, tokens: tokens[ 0].strip()).setResultsName('value') property_ = Group(key + Suppress(Literal('=')) + value) properties = Group(OneOrMore(property_)).setResultsName('properties') section_name = (Suppress('[') + OneOrMore(CharsNotIn(']')) + Suppress(']')).setResultsName('section') section = Group(section_name + properties) ini_file = ZeroOrMore(section).setResultsName('sections') ini_file.ignore(pythonStyleComment) return ini_file
def _getControls(self, index=1): identifier = QuotedString('"') | CharsNotIn(',') arglist = delimitedList(identifier) docstr = self.doc[1:] if index: return [(arglist.parseString(line)[index]).strip() for line in docstr] else: ret = [] for line in docstr: ret.append([z.strip() for z in arglist.parseString(line)]) return ret
def query_from_string(cls, filter_string): """ TODO: * handle values with " via: a.b.c.d="hello\"world" * handle keys with " via: a.\"b.c="yeah" * handle key with __ in it """ filter_string_raw = filter_string filter_string = str(filter_string) unicode_spaces = list(set( str(c) for c in filter_string if c.isspace())) unicode_spaces_other = unicode_spaces + [u'(', u')', u'=', u'"'] atom = CharsNotIn(unicode_spaces_other) atom_inside_quotes = CharsNotIn(u'"') atom_quoted = Literal('"') + Optional(atom_inside_quotes) + Literal( '"') EQUAL = Literal('=') grammar = (atom_quoted | atom) + EQUAL + Optional((atom_quoted | atom)) grammar.setParseAction(cls.BoolOperand) boolExpr = infixNotation( grammar, [ ("and", 2, opAssoc.LEFT, cls.BoolAnd), ("or", 2, opAssoc.LEFT, cls.BoolOr), ], ) try: res = boolExpr.parseString('(' + filter_string + ')') except ParseException: raise RuntimeError(u"Invalid query %s" % filter_string_raw) if len(res) > 0: return res[0].result raise RuntimeError("Parsing the filter_string %s went terribly wrong" % filter_string)
def __init__(self): self.ALPHA_LABEL = Regex(r'alpha\[\d+\]:') self.LNL_LABEL = Literal('Final GAMMA-based Score of best tree') self.FRQ_LABEL = Regex(r'Base frequencies: (?=\d+)') ^ Regex( r'ML estimate base freqs\[\d+\]:') self.NAMES_LABEL = Regex(r'Partition: \d+ with name:\s+') self.RATES_LABEL = Regex(r'rates\[\d+\].+?:') self.MODEL_LABEL = Literal('Substitution Matrix:') self.alpha = OneOrMore( Suppress(SkipTo(self.ALPHA_LABEL)) + Suppress(self.ALPHA_LABEL) + FLOAT) self.lnl = Suppress(SkipTo(self.LNL_LABEL)) + Suppress( self.LNL_LABEL) + FLOAT self.frq = OneOrMore( Group( Suppress(SkipTo(self.FRQ_LABEL)) + Suppress(self.FRQ_LABEL) + OneOrMore(FLOAT))) self.names = OneOrMore( Suppress(SkipTo(self.NAMES_LABEL)) + Suppress(self.NAMES_LABEL) + CharsNotIn('\n') + Suppress(LineEnd())) self.rates = OneOrMore( Group( Suppress(SkipTo(self.RATES_LABEL)) + Suppress(self.RATES_LABEL) + OneOrMore(FLOAT))) self.model = Suppress(SkipTo(self.MODEL_LABEL)) + Suppress( self.MODEL_LABEL) + WORD MODEL_LABEL = Literal('Substitution Matrix:') SCORE_LABEL = Literal('Final GAMMA likelihood:') DESC_LABEL = Literal('Model Parameters of Partition') NAME_LEADIN = Literal(', Name:') DATATYPE_LEADIN = Literal(', Type of Data:') ALPHA_LEADIN = Literal('alpha:') TREELENGTH_LEADIN = Literal('Tree-Length:') RATES_LABEL = Regex(r'rate \w <-> \w:') FREQS_LABEL = Regex(r'freq pi\(\w\):') model = Suppress(SkipTo(MODEL_LABEL)) + Suppress(MODEL_LABEL) + WORD likelihood = Suppress( SkipTo(SCORE_LABEL)) + Suppress(SCORE_LABEL) + FLOAT description = Suppress( SkipTo(DESC_LABEL)) + Suppress(DESC_LABEL) + INT + Suppress( NAME_LEADIN) + SPACEDWORD + Suppress(DATATYPE_LEADIN) + WORD alpha = Suppress(ALPHA_LEADIN) + FLOAT rates = Suppress(RATES_LABEL) + FLOAT freqs = Suppress(FREQS_LABEL) + FLOAT self._dash_f_e_parser = (Group(OneOrMore(model)) + likelihood + Group( OneOrMore( Group(description + alpha + Suppress(TREELENGTH_LEADIN) + Suppress(FLOAT) + Group(OneOrMore(rates)) + Group(OneOrMore(freqs))))))
class Include: """ Rule to parse #include directives. """ rule = (INCLUDE + LOPBRACK + CharsNotIn('>')("header") + ROPBRACK).setParseAction(lambda t: Include(t.header)) def __init__(self, header: CharsNotIn, parent: str = ''): self.header = header self.parent = parent def __repr__(self) -> str: return "#include <{}>".format(self.header)
def pattern(): """pyparsing pattern """ def attachLocation(s, loc, tocs): """pyparsing callback. Saves path position in the original string """ return [(loc, tocs[0])] from pyparsing import CharsNotIn, Combine, Literal, Optional, White, Word, nums # delayed import, performance optimization path = CharsNotIn(" \t")("path") path.setParseAction(attachLocation) longPath = CharsNotIn(" \t", min=2)("path") longPath.setParseAction(attachLocation) slashPath = Combine(Literal('/') + Optional(CharsNotIn(" \t")))("path") slashPath.setParseAction(attachLocation) pat = ((Literal('f ') + Optional(White()) + Optional(path)) ^ longPath ^ slashPath) + \ Optional(White() + Word(nums)("line")) pat.leaveWhitespace() pat.setParseAction(CommandOpen.create) return pat
def grammar(): parenthesis = Forward() parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")" field_def = OneOrMore(Word(alphanums + "_\"'`:-") | parenthesis) field_def.setParseAction(field_act) tablename_def = (Word(alphas + "`_") | QuotedString("\"")) field_list_def = field_def + ZeroOrMore(Suppress(",") + field_def) field_list_def.setParseAction(field_list_act) create_table_def = Literal( "CREATE") + "TABLE" + tablename_def.setResultsName( "tableName") + "(" + field_list_def.setResultsName( "fields") + ")" + ";" create_table_def.setParseAction(create_table_act) add_fkey_def = Literal( "ALTER") + "TABLE" + "ONLY" + tablename_def.setResultsName( "tableName") + "ADD" + "CONSTRAINT" + Word( alphanums + "_") + "FOREIGN" + "KEY" + "(" + Word( alphanums + "_").setResultsName("keyName") + ")" + "REFERENCES" + Word( alphanums + "_").setResultsName("fkTable") + "(" + Word( alphanums + "_").setResultsName("fkCol") + ")" + Optional( Literal("DEFERRABLE")) + ";" add_fkey_def.setParseAction(add_fkey_act) other_statement_def = OneOrMore(CharsNotIn(";")) + ";" other_statement_def.setParseAction(other_statement_act) comment_def = "--" + ZeroOrMore(CharsNotIn("\n")) comment_def.setParseAction(other_statement_act) return OneOrMore(comment_def | create_table_def | add_fkey_def | other_statement_def)
def getSignatures(signatureFile): fp = open(signatureFile, "rb") content = fp.read() fp.close() litteral = Word(alphas + nums + "_") regex_pattern = CharsNotIn("(") tags = (OneOrMore(Group('#' + litteral))).setResultsName("tags") hierarchy_modifier = oneOf("<= =") java_type = Group( Optional(hierarchy_modifier) + Word(alphas + nums + "_" + "." + "[" + "]" + "$")) return_type = (java_type | "*").setResultsName("return_type") method_name = (litteral | "<init>" | regex_pattern).setResultsName("method_name") parameter = Group((java_type + litteral)) parameter_list = (delimitedList(parameter) | "*").setResultsName("parameters") body_instruction = (CharsNotIn("{;}")) signature_body = (delimitedList(body_instruction, ";")).setResultsName("signature_body") class_name = (java_type | Group(Optional(hierarchy_modifier) + CharsNotIn(":"))).setResultsName("class_name") signature_stmt = Group((Optional(tags) + return_type + class_name+":"+method_name+ \ "(" + Optional(parameter_list) + ")"+"{"+Optional(signature_body)+"}")) grammar = OneOrMore(signature_stmt) grammar.ignore(dblSlashComment) result = grammar.parseString(content) #IPython.embed() #debug_db_data(content,result) signatures = [Signature(sig) for sig in result] return signatures
def __init__(self, debug=False): aggregate = Forward().setResultsName("OFC") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(OneOrMore(aggregate | content)) \ + aggregate_close_tag) self.parser = Group(aggregate).setResultsName("document") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction)
def __init__(self, debug=False): # Parser definition for headers header = Group( Word(alphas) + Literal(":").suppress() + Optional(CharsNotIn("\r\n"))) headers = Dict(OneOrMore(header)).setResultsName("header") # Parser definition for OFX body aggregate = Forward().setResultsName("OFX") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(ZeroOrMore(aggregate | content)) \ + aggregate_close_tag) body = Group(aggregate).setResultsName("body") # The parser as a whole self.parser = headers + body if (debug): self.parser.setDebugActions(_ofxStartDebugAction, _ofxSuccessDebugAction, _ofxExceptionDebugAction)
def Syntax(): delimitedList = (lambda x: x + ZeroOrMore(Suppress(",") + x) + Optional(Suppress(","))) dbl_quoted = Suppress('"') + Optional(CharsNotIn('"')) + Suppress('"') lelem = Word(alphanums + "-_") relem = Literal("true") | Literal("false") | Word(nums + ".") | dbl_quoted dict_ = Forward() assignment = lelem + Suppress("=") + (relem | (dict_)) dict_ << Suppress("{") + Group( Dict(delimitedList(Group(assignment))) | (dbl_quoted + Suppress(",") + Word(nums + ".")) | delimitedList(dict_)) + Suppress("}") return Dict(delimitedList(Group(assignment)))
def makeNewickParser(): # pyparsing from pyparsing import Combine, Optional, Literal, CaselessLiteral, \ Word, alphanums, \ nums, oneOf, Group, Dict, Forward, \ ParseResults, CharsNotIn, ZeroOrMore # literals lparen = Literal("(").suppress() rparen = Literal(")").suppress() colon = Literal(":").suppress() semicolon = Literal(":").suppress() comma = Literal(",").suppress() point = Literal(".") e = CaselessLiteral("E") # terminal rules name = Word(alphanums + "_" + "-" + "." + "+") fnumber = Combine(Word("+-"+nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-"+nums, nums))) dist = fnumber bootstrap = fnumber # recursive rules subtree = Forward() subtreelist = Forward() subtree << \ Group( ( (lparen + subtreelist + rparen).setResultsName("subtree") | name.setResultsName("name") ) + Optional( CharsNotIn(",);").setResultsName("data") ) ) subtreelist << subtree + Optional(comma + subtreelist) # top level rule tree = subtree + Word(";").suppress() return tree.parseString
def pattern(): """pyparsing pattern of the command """ def attachLocation(s, loc, tocs): return [(loc, tocs[0])] from pyparsing import CharsNotIn, Literal, Optional, White # delayed import, performance optimization path = CharsNotIn(" \t")("path") path.setParseAction(attachLocation) pat = (Literal('s ') + Optional(White()) + Optional(path)) pat.leaveWhitespace() pat.setParseAction(CommandSaveAs.create) return pat
def parse_variadic_templates(txt): template_param_type = Word(alphas) template_variadic = Literal('...') template_id = Word(alphas) template_variadic_param = Group(template_param_type + template_variadic + template_id) template_param = Group(template_param_type + template_id) # template_params = Group ( delimitedList( template_variadic_param | Optional(template_param) ) ) template_params = (Optional(OneOrMore(template_param + ',')) + template_variadic_param + Optional(OneOrMore(',' + template_param))) template_params_no_variadic = (template_param + Optional(OneOrMore(',' + template_param))) template_decl = Optional("template" + Literal("<") + template_params_no_variadic + Literal(">")) + "template" + Literal( "<") + template_params + Literal(">") block_content = Forward() block = nestedExpr('{', '}', content=block_content) + Literal(';') * (0, 1) block_content << (CharsNotIn('{}') | block) decl = originalTextFor(template_decl + CharsNotIn('{') + block) template_file = Forward() code_block = decl | White() | Word(printables) template_file << (Optional(OneOrMore(code_block)) | template_file) parsed = template_file.parseString(txt) return parsed
def process_task_lists(content: str) -> str: item = Group(CharsNotIn('\n') + (StringEnd() | '\n')).leaveWhitespace() checkbox = oneOf(['[ ]', '[x]']) marker = Suppress(oneOf(['+', '-', '*']) | Word(nums) + '.') # indent = oneOf([' ', '\t']).leaveWhitespace() indents = Group(ZeroOrMore(indent)) # list_item = Group(indents + marker + checkbox + item) # before = Suppress(StringStart() | Literal('\n\n')).leaveWhitespace() list_ = before + OneOrMore(list_item) # list_.setParseAction(replace_list) return list_.transformString(content)