def get_interpretation_markers(text): roman_dec = Word("ivxlcdm") upper_dec = Word(string.ascii_uppercase) marker_parser = LineStart() + ( Word(string.digits) | roman_dec | upper_dec) + Suppress(".") for citation, start, end in marker_parser.scanString(text): return citation[0]
def parsed_title(text, appendix_letter): digit_str_parser = (Marker(appendix_letter) + Suppress('-') + grammar.a1.copy().leaveWhitespace() + Optional(grammar.markerless_upper) + Optional(grammar.paren_upper | grammar.paren_lower) + Optional(grammar.paren_digit)) part_roman_parser = Marker("part") + grammar.aI parser = LineStart() + (digit_str_parser | part_roman_parser) for match, _, _ in parser.scanString(text): return match
def _applicable_parser(section): """Return a parser for lines which indicate where this interpretation is applicable.""" paragraph = (str(section) + lower_alpha_sub.setResultsName("paragraph1") + Optional(digit_sub.setResultsName("paragraph2") + Optional(roman_sub.setResultsName("paragraph3") + Optional(upper_alpha_sub.setResultsName("paragraph4"))))) whole_par = LineStart() + ("Paragraph" + paragraph) keyterm = (LineStart() + paragraph + SkipTo("\n").setResultsName("term") + LineEnd()) return whole_par.setResultsName("whole") | keyterm.setResultsName("keyterm")
def detect_token(jade): doctype = LineStart() + oneOf('!!! doctype') + Optional(oneOf('5 html xml' \ + ' default transitional strict frameset 1.1 basic mobile', True)) doctype.setParseAction(parse_doctype) element_id = Suppress('#') + Word(alphanums + '_' + '-') element_class = Suppress('.') + Word(alphanums + '_' + '-') selectors = (element_id.setResultsName('element_id') \ + ZeroOrMore(element_class).setResultsName('element_class')) \ | (OneOrMore(element_class).setResultsName('element_class') \ + Optional(element_id).setResultsName('element_id')) selectors.setParseAction(parse_selectors) element = selectors.setResultsName('selectors') \ | (Word(alphas).setResultsName('element_name') \ + Optional(selectors).setResultsName('selectors')) element.setParseAction(parse_element) attribute = CharsNotIn('('+')') attributes = nestedExpr(content=attribute) tag = element.setResultsName('element') \ + Optional(attributes).setResultsName('attributes') tag.setParseAction(parse_tag) # TODO: block-comment and conditional-comment unbuffered_comment = Suppress(Suppress('//-') + restOfLine) buffered_comment = Suppress('//') + restOfLine buffered_comment.setParseAction(parse_buffered_comment) # Order matters here, as buffered will pick up # unbuffered comments if set first comment = unbuffered_comment | buffered_comment source = doctype | tag | comment parsed = source.parseString(jade) return ' '.join(parsed) '''
def uniqueLabels(names): """ returns a list of unique, valid labels for a list of layer names. Where "valid" means conforming to the syntax for rasters in the language.""" i=1 labels = [] from lispy import rasterSyntax from pyparsing import LineStart, LineEnd validRaster = LineStart() + rasterSyntax() + LineEnd() for name in names: name1 = name try: validRaster.parseString(name1) except: name1 = "layer" name2 = name1 while name1 in labels: name1 = name2 + "-%s" % i i=i+1 labels.append(name1) return labels
def uniqueLabels( names ): i = 1 labels = [] from rastercalcengine import rasterName from pyparsing import LineStart, LineEnd validRaster = LineStart() + rasterName() + LineEnd() for name in names: name1 = "[" + name + "]" try: ss = validRaster.parseString( str( name1 ) ) except: name1 = "layer" name1 = name1 name2 = "[" + name while name1 in labels: name1 = name2 + "_%s]" % i i = i + 1 labels.append( name1 ) return labels
def _define_grammar(self): '''define the grammar to be used, and add actions''' self._define_actions() eol = LineEnd().suppress() white = Optional(White()).suppress() begin = Keyword('begin').suppress() end = Keyword('end').suppress() comment = (Literal('#') + restOfLine).suppress() data_value = Combine(OneOrMore(CharsNotIn('#\n\r'))) data_line = (LineStart() + white + Optional(data_value) + Optional(comment) + eol) block_name = Word(alphas, alphanums + '_') begin_block = (LineStart() + begin + block_name + Optional(comment) + eol) end_block = LineStart() + end + block_name + Optional(comment) + eol junk = ZeroOrMore(LineStart() + white + NotAny(begin) + restOfLine + eol).suppress() data = Group(ZeroOrMore(NotAny(end) + data_line)) block_def = begin_block + data + end_block block_defs = junk + OneOrMore(block_def + junk) self._grammar = block_defs begin_block.addParseAction(self._begin_block_action) end_block.addParseAction(self._end_block_action) data_value.addParseAction(self._data_value_action)
procid = Optional(procid) procid = procid.setResultsName('PROCID') app_name = Or([nilvalue, CharsNotIn('= ]"', 1, 48)]) if SUPPORT_MISSING_VALUES: app_name = Optional(app_name) app_name= app_name.setResultsName('APP_NAME') hostname = Or([nilvalue, CharsNotIn('= ]"', 1, 255)]) if SUPPORT_MISSING_VALUES: hostname = Optional(hostname) hostname = hostname.setResultsName('HOSTNAME') version = Regex('[1-9][0-9]{0,2}').setResultsName('VERSION') prival = Regex("[0-9]{1,3}").setResultsName('PRIVAL') pri = "<" + prival + ">" header = pri + version + sp + timestamp + sp + hostname + sp + \ app_name + sp + procid + sp + msgid syslog_msg = LineStart() + header + structured_data + \ Optional(sp+msg) + LineEnd() # Default Prival for new SyslogEntry instances from constants import LOG_INFO,LOG_USER DEFAULT_PRIVAL = LOG_INFO|LOG_USER class Params(object): def __init__(self, d): for k, v in d.items(): setattr(self, k, v) class SDElement(object): """ An SD-ELEMENT consists of a name and parameter name-value pairs. """
import pyparsing from pyparsing import (ParserElement, OneOrMore, ZeroOrMore, Word, alphas, alphanums, delimitedList, Group, restOfLine, LineStart, LineEnd, StringEnd) __all__ = ['load_rules'] ws = ' \t' ParserElement.setDefaultWhitespaceChars(ws) EOL = LineEnd().suppress() SOL = LineStart().leaveWhitespace() blankline = SOL + LineEnd() noIndentation = SOL + ~Word(ws).leaveWhitespace().suppress() indentation = SOL + Word(ws).leaveWhitespace().suppress() # Single statements keyword = Word(alphanums) value = restOfLine value.setParseAction(lambda tokens: tokens[0].strip()) oneLineStatement = keyword("keyword") + value("value") + EOL # If statements nonIndentedLine = noIndentation + restOfLine() + EOL indentedLine = indentation + Group(oneLineStatement) indentedBody = OneOrMore(indentedLine) ifConditions = (restOfLine() + EOL + ZeroOrMore(nonIndentedLine)) ifConditions.setParseAction(lambda tokens: [t for t in tokens if t])
sys.exit(77) try: from evdev.ecodes import ecodes except ImportError: ecodes = None print('WARNING: evdev is not available') try: from functools import lru_cache except ImportError: # don't do caching on old python lru_cache = lambda: (lambda f: f) EOL = LineEnd().suppress() EMPTYLINE = LineStart() + LineEnd() COMMENTLINE = pythonStyleComment + EOL INTEGER = Word(nums) REAL = Combine((INTEGER + Optional('.' + Optional(INTEGER))) ^ ('.' + INTEGER)) UDEV_TAG = Word(string.ascii_uppercase, alphanums + '_') TYPES = {'mouse': ('usb', 'bluetooth', 'ps2', '*'), 'evdev': ('name', 'atkbd', 'input'), 'touchpad': ('i8042', 'rmi', 'bluetooth', 'usb'), 'keyboard': ('name', ), } @lru_cache() def hwdb_grammar(): ParserElement.setDefaultWhitespaceChars('')
for m in match: f.write(u"{} {}\n".format(m[0], m[1]).encode("utf8")) from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine name = Word(printables).setResultsName("name") seq_start = Literal("5'").suppress() seq_stop = Literal("3'").suppress() sequence = Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq") mwg_primer = LineStart() + name + SkipTo(LineStart()) + sequence result = mwg_primer.scanString(raw_string) seqlist = [data for data,dataStart,dataEnd in result] number+=len(seqlist) fasta_string = '' for data in seqlist: number-=1 s=data.seq.strip("-").replace("\n","").replace(" ","") fasta_string+=">{number}_{name} ({length}-mer)\n{seq}\n\n".format(number=number,name=data.name,length=len(s),seq=s) fasta_string
from regparser.grammar import atomic, unified from regparser.grammar.utils import (DocLiteral, Marker, QuickSearchable, keep_pos) smart_quotes = QuickSearchable( Suppress(DocLiteral(u'“', "left-smart-quote")) + keep_pos( SkipTo(DocLiteral(u'”', "right-smart-quote"))).setResultsName("term")) e_tag = ( Suppress(Regex(r"<E[^>]*>")) + keep_pos(OneOrMore(Word(srange("[a-zA-Z-]")))).setResultsName("term") + Suppress(Literal("</E>"))) xml_term_parser = QuickSearchable( LineStart() + Optional(Suppress(unified.any_depth_p)) + e_tag.setResultsName("head") + ZeroOrMore((atomic.conj_phrases + e_tag).setResultsName("tail", listAllMatches=True)) + Suppress(ZeroOrMore(Regex(r",[a-zA-Z ]+,"))) + Suppress(ZeroOrMore((Marker("this") | Marker("the")) + Marker("term"))) + ((Marker("mean") | Marker("means")) | (Marker("refers") + ZeroOrMore(Marker("only")) + Marker("to")) | ( (Marker("has") | Marker("have")) + Marker("the") + Marker("same") + Marker("meaning") + Marker("as")))) key_term_parser = QuickSearchable( LineStart() + Optional(Suppress(unified.any_depth_p)) + Suppress(Regex(r"<E[^>]*>")) + keep_pos(OneOrMore(Word(srange("[a-zA-Z-,]")))).setResultsName("term") + Optional(Suppress(".")) + Suppress(Literal("</E>")))
def line(contents): return LineStart() + ZeroOrMore(Group(contents)) + LineEnd().suppress()
import string from pyparsing import LineEnd, LineStart, SkipTo from regparser.grammar import atomic, unified section = (atomic.section_marker.copy().leaveWhitespace() + unified.part_section + SkipTo(LineEnd())) par = (atomic.section.copy().leaveWhitespace() + unified.depth1_p + SkipTo(LineEnd())) marker_par = (atomic.paragraph_marker.copy().leaveWhitespace() + atomic.section + unified.depth1_p) appendix = (atomic.appendix_marker.copy().leaveWhitespace() + (unified.appendix_with_section | atomic.appendix) + SkipTo(LineEnd())) parser = LineStart() + (section | marker_par | par | appendix)
from pyparsing import LineEnd, LineStart, SkipTo from regparser.grammar import atomic, unified, utils section = (atomic.section_marker.copy().leaveWhitespace() + unified.part_section + SkipTo(LineEnd())) par = (atomic.section.copy().leaveWhitespace() + unified.depth1_p + SkipTo(LineEnd())) marker_par = (atomic.paragraph_marker.copy().leaveWhitespace() + atomic.section + unified.depth1_p) appendix = (atomic.appendix_marker.copy().leaveWhitespace() + atomic.appendix + SkipTo(LineEnd())) parser = utils.QuickSearchable(LineStart() + (section | marker_par | par | appendix))
class NginxConfigParser(object): """ Nginx config parser based on https://github.com/fatiherikli/nginxparser Parses single file into json structure """ max_size = 20*1024*1024 # 20 mb # line starts/ends line_start = LineStart().suppress() line_end = LineEnd().suppress() # constants left_brace = Literal("{").suppress() left_parentheses = Literal("(").suppress() right_brace = Literal("}").suppress() right_parentheses = Literal(")").suppress() semicolon = Literal(";").suppress() space = White().suppress() singleQuote = Literal("'").suppress() doubleQuote = Literal('"').suppress() # keys if_key = Keyword("if").setParseAction(set_line_number) set_key = Keyword("set").setParseAction(set_line_number) rewrite_key = Keyword("rewrite").setParseAction(set_line_number) perl_set_key = Keyword("perl_set").setParseAction(set_line_number) log_format_key = Keyword("log_format").setParseAction(set_line_number) alias_key = Keyword("alias").setParseAction(set_line_number) return_key = Keyword("return").setParseAction(set_line_number) error_page_key = Keyword("error_page").setParseAction(set_line_number) map_key = Keyword("map").setParseAction(set_line_number) server_name_key = Keyword("server_name").setParseAction(set_line_number) sub_filter_key = Keyword("sub_filter").setParseAction(set_line_number) # lua keys start_with_lua_key = Regex(r'lua_\S+').setParseAction(set_line_number) contains_by_lua_key = Regex(r'\S+_by_lua\S*').setParseAction(set_line_number) key = ( ~map_key & ~alias_key & ~perl_set_key & ~if_key & ~set_key & ~rewrite_key & ~server_name_key & ~sub_filter_key ) + Word(alphanums + '$_:%?"~<>\/-+.,*()[]"' + "'").setParseAction(set_line_number) # values value_one = Regex(r'[^{};]*"[^\";]+"[^{};]*') value_two = Regex(r'[^{};]*\'[^\';]+\'') value_three = Regex(r'[^{};]+((\${[\d|\w]+(?=})})|[^{};])+') value_four = Regex(r'[^{};]+(?!${.+})') value = (value_one | value_two | value_three | value_four).setParseAction(set_line_number) quotedValue = Regex(r'"[^;]+"|\'[^;]+\'').setParseAction(set_line_number) rewrite_value = CharsNotIn(";").setParseAction(set_line_number) any_value = CharsNotIn(";").setParseAction(set_line_number) non_space_value = Regex(r'[^\'\";\s]+').setParseAction(set_line_number) if_value = Regex(r'\(.*\)').setParseAction(set_line_number) language_include_value = CharsNotIn("'").setParseAction(set_line_number) strict_value = CharsNotIn("{};").setParseAction(set_line_number) sub_filter_value = (non_space_value | Regex(r"\'(.|\n)+?\'", )).setParseAction(set_line_number) # map values map_value_one = Regex(r'\'([^\']|\s)*\'').setParseAction(set_line_number) map_value_two = Regex(r'"([^"]|\s)*\"').setParseAction(set_line_number) map_value_three = Regex(r'((\\\s|[^{};\s])*)').setParseAction(set_line_number) map_value = (map_value_one | map_value_two | map_value_three) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules assignment = ( key + Optional(space) + Optional(value) + Optional(space) + Optional(value) + Optional(space) + semicolon ).setParseAction(set_line_number) set = ( set_key + Optional(space) + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) rewrite = ( rewrite_key + Optional(space) + rewrite_value + Optional(space) + semicolon ).setParseAction(set_line_number) perl_set = ( perl_set_key + Optional(space) + key + Optional(space) + singleQuote + language_include_value + singleQuote + Optional(space) + semicolon ).setParseAction(set_line_number) lua_content = ( (start_with_lua_key | contains_by_lua_key) + Optional(space) + singleQuote + language_include_value + singleQuote + Optional(space) + semicolon ).setParseAction(set_line_number) alias = ( alias_key + space + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) return_ = ( (return_key | error_page_key) + space + value + Optional(space) + Optional(any_value) + Optional(space) + semicolon ).setParseAction(set_line_number) log_format = ( log_format_key + Optional(space) + strict_value + Optional(space) + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) server_name = ( server_name_key + space + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) sub_filter = ( sub_filter_key + space + sub_filter_value + space + sub_filter_value + Optional(space) + semicolon ).setParseAction(set_line_number) # script map_block = Forward() map_block << Group( Group( map_key + space + map_value + space + map_value + Optional(space) ).setParseAction(set_line_number) + left_brace + Group( ZeroOrMore( Group(map_value + Optional(space) + Optional(map_value) + Optional(space) + semicolon) ).setParseAction(set_line_number) ) + right_brace ) block = Forward() block << Group( ( Group( key + Optional(space + modifier) + Optional(space) + Optional(value) + Optional(space) + Optional(value) + Optional(space) ) | Group(if_key + space + if_value + Optional(space)) ).setParseAction(set_line_number) + left_brace + Group( ZeroOrMore( Group(log_format) | Group(lua_content) | Group(perl_set) | Group(set) | Group(rewrite) | Group(alias) | Group(return_) | Group(assignment) | Group(server_name) | Group(sub_filter) | map_block | block ).setParseAction(set_line_number) ).setParseAction(set_line_number) + right_brace ) script = OneOrMore( Group(log_format) | Group(perl_set) | Group(lua_content) | Group(alias) | Group(return_) | Group(assignment) | Group(set) | Group(rewrite) | Group(sub_filter) | map_block | block ).ignore(pythonStyleComment) INCLUDE_RE = re.compile(r'[^#]*include\s+(?P<include_file>.*);') SSL_CERTIFICATE_RE = re.compile(r'[^#]*ssl_certificate\s+(?P<cert_file>.*);') def __init__(self, filename='/etc/nginx/nginx.conf'): global tokens_cache tokens_cache = {} self.filename = filename self.folder = '/'.join(self.filename.split('/')[:-1]) # stores path to folder with main config self.files = {} # to prevent cycle files and line indexing self.parsed_cache = {} # to cache multiple includes self.broken_files = set() # to prevent reloading broken files self.index = [] # stores index for all sections (points to file number and line number) self.ssl_certificates = [] self.errors = [] self.tree = {} def parse(self): self.tree = self.__logic_parse(self.__pyparse(self.filename)) # drop cached self.parsed_cache = None @staticmethod def get_file_info(filename): """ Returns file size, mtime and permissions :param filename: str filename :return: int, int, str - size, mtime, permissions """ size, mtime, permissions = 0, 0, '0000' try: size = os.path.getsize(filename) mtime = int(os.path.getmtime(filename)) permissions = oct(os.stat(filename).st_mode & 0777) except Exception, e: exception_name = e.__class__.__name__ message = 'failed to stat %s due to: %s' % (filename, exception_name) context.log.debug(message, exc_info=True) return size, mtime, permissions
string.setParseAction(lambda toks: ["'%s'" % toks[0]]) columnName = delimitedList( ident, ".",combine=True) tableName = delimitedList( ident, ".",combine=True) dataType = Word(alphas) + Combine(Optional(Literal("(") + (Word(nums) ^ delimitedList(string,combine=True)) + Literal(")"))) + ZeroOrMore(nnToken ^ autoincrementToken ^ (defaultToken + (string ^ nullToken)) ^ unsignedToken.suppress() ) dataType.setParseAction(convert_datatypes) columnDescription = Group(ident + dataType) keyDescription = Optional(primaryToken ^ uniqueToken) + keyToken + Optional(ident) + Literal("(") + delimitedList(ident + Optional(Literal("(") + Word(nums) + Literal(")"))) + Literal(")") createTableStmt = Group(createToken + tableToken + ifneToken + ident + Literal("(")) + delimitedList(columnDescription ^ keyDescription.suppress()) + Group(Literal(")")) + Optional(autoincrementToken + Literal("=") + Word(nums)).suppress() createTableStmt.setParseAction(rebuild_createtable) createDataBaseStmt = Group(createToken + databaseToken + ident + dcsToken + Word(alphanums)+ collateToken + ident) useStmt = Group(useToken + ident) comment = LineStart() + CharsNotIn("\n") + LineEnd() value = Group(Literal("(") + delimitedList(Word(nums) ^ string) + Literal(")")) insertPrefix = Group(insertToken + intoToken + ident + Literal("(") + delimitedList(ident) + Literal(")") + valuesToken) insertStmt = insertPrefix + delimitedList(value) insertStmt.setParseAction(rebuild_insert) statement = ((createTableStmt ^ createDataBaseStmt.suppress() ^ useStmt.suppress() ^ insertStmt) + Literal(";").setParseAction(lambda: [";\n"])) ^ comment.suppress() ^ White().suppress() sql = ZeroOrMore(statement)
class TypeDocGrammar: """ EOL ::= ["\r"] "\n" SOL ::= LINE_START line ::= [^EOL]+ EOL word ::= alphanums + "_" indented_block ::= INDENT (line_indented | any_line) line_indented ::= any_line indented_block type_definition ::= ":type" [^:]+ ":" [^EOL]+ rtype_definition ::= ":rtype:" [^EOL]+ returns_definition ::= (":returns:" | ":return:") [^EOL]+ param_definition ::= ":param" [^:]+ ":" [^EOL]+ EOL [indented_block] response_structure ::= "**Response Structure**" line [indented_block] typed_dict_key_line ::= "-" "**" word "**" "*(" word ")" "--*" [^EOL]+ + EOL type_line ::= "-" "*(" word ")" "--*" [^EOL]+ + EOL any_line ::= typed_dict_key_line | type_line | line """ indent_stack = [1] SOL = LineStart().suppress() EOL = LineEnd().suppress() word = Word(alphanums + "_") line = SkipTo(LineEnd()) + EOL line_indented = Forward() any_line = Forward() indented_block = indentedBlock( line_indented | any_line, indentStack=indent_stack).setResultsName("indented") line_indented <<= any_line + indented_block type_definition = (SOL + Literal(":type") + SkipTo(":").setResultsName("name") + Literal(":") + SkipTo(EOL).setResultsName("type_name")) rtype_definition = (SOL + Literal(":rtype:") + SkipTo(EOL).setResultsName("type_name")) returns_definition = (SOL + (Literal(":returns:") | Literal(":return:")) + SkipTo(EOL).setResultsName("description")) param_definition = (SOL + Literal(":param") + SkipTo(":").setResultsName("name") + Literal(":") + SkipTo(EOL).setResultsName("description") + EOL + Optional(indented_block)) response_structure = Literal("**Response Structure**") + line_indented typed_dict_key_line = (Literal("-") + White(ws=" \t") + Literal("**") + word.setResultsName("name") + Literal("**") + White(ws=" \t") + Literal("*(") + word.setResultsName("type_name") + Literal(")") + White(ws=" \t") + Literal("--*") + SkipTo(EOL).setResultsName("description") + EOL) type_line = (Literal("-") + White(ws=" \t") + Literal("*(") + word.setResultsName("type_name") + Literal(")") + White(ws=" \t") + Literal("--*") + SkipTo(EOL).setResultsName("description") + EOL) any_line <<= (typed_dict_key_line | type_line | line).setResultsName("line") @classmethod def fail_action(cls, _input_string: str, _chr_index: int, _source: str, error: BaseException) -> None: if "found end of text" not in str(error): raise error @classmethod def reset(cls) -> None: cls.disable_packrat() cls.indented_block.setFailAction(cls.fail_action) cls.indent_stack.clear() cls.indent_stack.append(1) @staticmethod def enable_packrat() -> None: ParserElement.enablePackrat(cache_size_limit=128) @staticmethod def disable_packrat() -> None: ParserElement.enablePackrat(cache_size_limit=None)
scopedIdent.setParseAction(lambda t: "_".join(t)) print("(replace namespace-scoped names with C-compatible names)") print(scopedIdent.transformString(testData)) # or a crude pre-processor (use parse actions to replace matching text) def substituteMacro(s, l, t): if t[0] in macros: return macros[t[0]] ident.setParseAction(substituteMacro) ident.ignore(macroDef) print("(simulate #define pre-processor)") print(ident.transformString(testData)) ################# print("Example of a stripper") print("----------------------") from pyparsing import dblQuotedString, LineStart # remove all string macro definitions (after extracting to a string resource table?) stringMacroDef = Literal( "#define") + ident + "=" + dblQuotedString + LineStart() stringMacroDef.setParseAction(replaceWith("")) print(stringMacroDef.transformString(testData))
# floating point fp = Combine(Word(nums + "+-") + Literal(".") + Word(nums)) # fortran real exp = oneOf("E e D d") real = Combine(fp("base") + exp.setParseAction(lambda x: "e") + integer("exponent")) # C type char = Word(printables) # Decks of data # ------------------------------------------------------------------------------------------ # prelim data_type = oneOf("R I C") name_of_deck = LineStart() + OneOrMore( Word(printables), stopOn=White(min=3) + data_type ).setParseAction(" ".join) # single value decks ival_deck = name_of_deck("key") + Literal("I")("type") + integer("value") rval_deck = name_of_deck("key") + Literal("R")("type") + real("value") cval_deck = name_of_deck("key") + Literal("C")("type") + char("value") # we have to parse this one differently char_arr_deck = ( name_of_deck("key") + Literal("C")("type") + Literal("N=").suppress() + integer("size") + LineEnd().suppress()
raise ParseFatalException(parsed_str, loc=location, msg=message.format(block_name, token[0])) def handle_data(token): global current_block current_block.add_data(token[0]) eol = LineEnd().suppress() begin = Keyword("begin").suppress() end = Keyword("end").suppress() comment = (Literal("#") + restOfLine).suppress() data_value = Combine(OneOrMore(CharsNotIn("#\n\r"))) data = LineStart() + Optional(data_value) + Optional(comment) + eol block_name = Word(alphas, alphanums + "_") begin_block = LineStart() + begin + block_name + Optional(comment) + eol end_block = LineStart() + end + block_name + Optional(comment) + eol junk = ZeroOrMore(LineStart() + NotAny(begin) + restOfLine + eol).suppress() block_def = begin_block + Group(ZeroOrMore(NotAny(end) + data)) + end_block block_defs = junk + OneOrMore(block_def + junk) begin_block.addParseAction(create_block) end_block.addParseAction(finish_block) data_value.addParseAction(handle_data) test_str = """ bla bla begin block_1 # bla bla 0.17 # suspicious value # comment line -7.34
#vim: set encoding=utf-8 from pyparsing import (LineStart, Literal, OneOrMore, Optional, Regex, SkipTo, srange, Suppress, Word, ZeroOrMore) from regparser.grammar import atomic, unified from regparser.grammar.utils import DocLiteral, keep_pos, Marker smart_quotes = (Suppress(DocLiteral(u'“', "left-smart-quote")) + SkipTo(DocLiteral(u'”', "right-smart-quote")).setParseAction( keep_pos).setResultsName("term")) e_tag = (Suppress(Regex(r"<E[^>]*>")) + OneOrMore(Word( srange("[a-zA-Z-]"))).setParseAction(keep_pos).setResultsName("term") + Suppress(Literal("</E>"))) xml_term_parser = (LineStart() + Suppress(unified.any_depth_p) + e_tag.setResultsName("head") + ZeroOrMore( (atomic.conj_phrases + e_tag).setResultsName( "tail", listAllMatches=True)) + ((Marker("mean") | Marker("means")) | ((Marker("has") | Marker("have")) + Marker("the") + Marker("same") + Marker("meaning") + Marker("as")))) scope_term_type_parser = ( Marker("purposes") + Marker("of") + Optional(Marker("this")) + SkipTo(",").setResultsName("scope") + Literal(",") + Optional(Marker("the") + Marker("term")) + SkipTo(Marker("means") | (Marker("refers") + Marker("to"))).setParseAction(keep_pos).setResultsName("term"))