def asn1_loads(asn1_str): """ Parse an ASN.1 file This is currently Pseudo-ASN; modify to become actual ASN.1 """ # ASN.1 grammar identifier = pp.Word(pp.alphas + "_") assign = pp.Literal("::=") # typedef = identifier.setName("typeref") + assign + identifier.setName("basetype") comment1 = pp.Literal("#") + pp.originalTextFor(pp.SkipTo(pp.LineEnd())) # typelist = pp.OneOrMore(typedef) meta1 = pp.LineStart() + identifier + pp.Literal(":") + pp.SkipTo( pp.LineEnd()).setDebug() meta2 = pp.LineStart() + pp.White() + pp.SkipTo(pp.LineEnd()).setDebug() metaval = meta1 + pp.ZeroOrMore(meta2) # metalist = pp.ZeroOrMore(comment1) + pp.Literal("/*") + pp.OneOrMore(metaval) + pp.Literal("*/") metalist = pp.SkipTo(pp.Literal("/*")).setDebug() + pp.Literal( "/*") + pp.OneOrMore(metaval).setDebug() + pp.Literal("*/") asn1 = metalist.parseString(asn1_str, parseAll=False) print(asn1) jaen = {"meta": {}, "types": []} return jaen
def _build_rawlen_parser(): header = pp.Group( pp.LineStart().suppress() + pp.Word(pp.printables) + pp.ZeroOrMore(pp.White(ws='\t ').suppress() + pp.Word(pp.printables))) row = (pp.LineStart().suppress() + pp.Word(pp.nums + '.+-e_') + pp.ZeroOrMore( pp.White(ws='\t ').suppress() + pp.Word(pp.nums + '.+-e_'))) return header.setResultsName('header') + \ pp.Group(pp.OneOrMore(pp.Group(row))).setResultsName('values')
def inner_flatten(text): while(True): #Got to allow for for recursive includes changed=False def subsitute_include(string,loc, tokens): path=tokens[0][0][1:-1] if path in past_includes: warn("Denied attempt to re-include "+path ) return "" #Avoid including something already included, (no infinite loops) else: past_includes.add(path) changed=True; print("* expanding", path) directory, file = os.path.split(path) with Context(directory): include_text = load_include(file) return inner_flatten(include_text) pp_include = (pp.LineStart() + pp.Literal("include").suppress() + pp.nestedExpr( content=pp.QuotedString('"') ).addParseAction(subsitute_include) ) text = pp_include.transformString(text) if not(changed): return text
def _funcParser(): # --- func attribute parser --- # TODO add debug names # TODO add a conditional debug flag bracedString = p.Combine( p.Regex(r"{(?:[^{\n\r\\]|(?:{})|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "}").setName("string enclosed in braces") funcIndicator = p.Literal('!') funcIndicator.setName('indicator') funcName = p.Word(p.alphanums) funcName.setName('name') funcSeparator = p.Suppress(p.Literal('::')) funcSeparator.setName('separator') funcModule = p.Word(p.printables, excludeChars='(') funcModule.setName('module') funcDemarcStart = p.Literal("(") funcDemarcStart.setName('demarcstart') funcDemarcEnd = p.Literal(")") funcDemarcEnd.setName('demarcend') funcMiddle = p.nestedExpr( ) #(p.sglQuotedString() | bracedString()) # | p.dblQuotedString()) funcMiddle.setName('middle') funcPattern = p.LineStart() + p.Suppress(funcIndicator) + funcName + p.Suppress(funcSeparator) + \ funcModule + funcMiddle + \ p.Suppress(p.Optional(p.LineEnd())) #funcModule + p.Suppress(funcDemarcStart) + p.Optional(funcMiddle) + p.Suppress(funcDemarcEnd) + \ return funcPattern
def parse_rest_case_data(rest_case_data): data = {} listing = p.Suppress(p.Literal("(")) + p.Word( p.alphanums).setResultsName("listing") + p.Suppress( p.Literal("Listing)")) additional_info = p.And([ p.SkipTo(p.Literal(": ")), p.Suppress(p.Literal(": ")), p.SkipTo(p.White(min=2) | p.StringEnd()), ]) rest_case_data_detail = p.And([ p.SkipTo(listing).setResultsName("address"), listing, p.SkipTo(p.LineStart() + p.Word(p.nums)).setResultsName("additional_info"), p.SkipTo(p.StringEnd()).setResultsName("rest_case_data") ]) for key, value in rest_case_data_detail.parseString( rest_case_data).asDict().items(): if key == "address": data['address'] = value[0].strip() elif key == "additional_info": additional_info = p.ZeroOrMore( p.Group(additional_info)).parseString(value[0]) data.update(dict(additional_info.asList())) else: data[key] = value.strip() return data
def _build_csv_parser(): separator = pp.Suppress(':') key = pp.Word(pp.printables, excludeChars=':') value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress() block_name = key + separator + pp.LineEnd().suppress() key_value = key + separator + value header = (pp.LineStart().suppress() + pp.Word(pp.nums) + pp.ZeroOrMore( pp.White().suppress() + pp.Word(pp.nums)) + pp.LineEnd().suppress()) csv_header = pp.delimitedList(pp.Word(pp.printables, excludeChars=',')) + pp.LineEnd().suppress() csv_row = pp.delimitedList(pp.Word(pp.nums + ';.+-e_') | pp.Literal('custom')) + pp.LineEnd().suppress() indent_stack = [1] block = pp.Forward() block_body = ( block | key_value) indented_block = pp.Dict(pp.ungroup(pp.indentedBlock(block_body, indent_stack))) block << ( block_name + indented_block | key_value) return pp.Optional(header) + pp.ZeroOrMore(pp.Dict(pp.Group(block))).setResultsName('meta') + \ csv_header.setResultsName('csvHeader') + \ pp.Group(pp.OneOrMore(pp.Group(csv_row))).setResultsName('csvValues')
def parseLoadstring(loadstring): attributeStartMarker = p.LineStart() + p.Literal('@') attributeStopMarker = p.Literal(':') exp = attributeStartMarker.suppress() + p.Word( p.alphanums + '_') + attributeStopMarker.suppress() # ret = {} ret = defaultdict(list) for index, line in enumerate(loadstring.splitlines()): # print(line) # TODO switch from scanString to full parsing result = list(exp.scanString(line)) if len(result) > 0: # TODO this is kludgy attribname = result[0][0][0] matchstart = result[0][1] matchend = result[0][2] + 1 if matchstart == 0: # print('matched on line %s' % index) # print('%s: %s' % (attribname, line[matchend:])) ret[attribname].append(line[matchend:]) else: raise Exception('attrib not at the start of the line') else: # print('no match on line %s' % index) # last = len(ret) - 1 ret[attribname][-1:] = [ ret[attribname][-1:][0] + "" + line.strip() ] return ret
def get_macros(top_scoped_text): pp_macros = (pp.LineStart() + pp.Literal("macro").suppress() + pp_identifier) parsed_macros = pp_macros.scanString(top_scoped_text) macros = _matched_only(parsed_macros) return ["@"+macro for macro in macros]
class PortWithProfile(Node): """ Variant of :class:`Port` that is used by "card" records inside the "Ports" property. It differs from the normal port syntax by having different entries inside the last section. Availability is not listed here, only priority. Priority does not have a colon before the actual number. This port is followed by profile assignment. """ __fragments__ = { 'name': 'port-name', 'label': 'port-label', 'priority': 'port-priority', 'latency_offset': 'port-latency-offset', 'availability': 'port-availability', 'properties': lambda t: t['port-properties'].asList(), 'profile_list': lambda t: t['port-profile-list'].asList(), } __syntax__ = ( p.Word(p.alphanums + "-;").setResultsName('port-name') + p.Suppress(':') # This part was very tricky to write. The label is basically arbitrary # localized Unicode text. We want to grab all of it in one go but # without consuming the upcoming and latest '(' character or the space # that comes immediately before. # # The syntax here combines a sequence of words, as defined by anything # other than a space and '(', delimited by a single whitespace. + p.Combine( p.OneOrMore(~p.FollowedBy(p.Regex('\(.+?\)') + p.LineEnd()) + p.Regex('[^ \n]+') + p.White().suppress()), ' ').setResultsName('port-label') + p.Suppress('(') + p.Keyword('priority').suppress() + p.Optional(p.Suppress(':')) + p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName( 'port-priority') + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Keyword('latency offset:').suppress() + p.Word(p.nums).setParseAction(lambda t: int(t[0])) + p.Literal("usec").suppress(), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-latency-offset')) + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Literal('not available'), p.Suppress(',') + p.Literal('available'), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-availability')) + p.Suppress(')') + p.LineEnd().suppress() + p.Optional( p.MatchFirst([ p.LineStart().suppress() + p.NotAny(p.White(' ')) + p.White('\t').suppress() + p.Keyword('Properties:').suppress() + p.LineEnd().suppress() + PropertyAttributeValue, p.Empty().setParseAction(lambda t: []) ]).setResultsName('port-properties')) + p.White('\t', max=3).suppress() + p.Literal("Part of profile(s)").suppress() + p.Suppress(":") + p.delimitedList( p.Word(p.alphanums + "+-:"), ", ").setResultsName("port-profile-list")).setResultsName("port")
def lexical_analysis(self, src): string = pp.Regex('[a-zA-Z0-9_{}"=+\-*/\.:;&%@$#<>? a-zA-Zぁ-ゔゞァ-・ヽヾ゛゜ー一-龯]+') blank = pp.LineStart() + pp.LineEnd() start = '[' end = ']' + pp.LineEnd() graph_tag = pp.LineStart() + '@' graph = graph_tag + start + string + end view_tag = pp.LineStart() + '#' view = view_tag + start + string + end server_process_tag = pp.LineStart() + '$' server_process = server_process_tag + start + string + end client_process_tag = pp.LineStart() + '%' client_process = client_process_tag + start + string + end view_transition_identifier = pp.LineStart() + '-->' view_transition = view_transition_identifier + string process_transition_identifier = pp.LineStart() + '==>' process_transition = process_transition_identifier + string state_machine = pp.OneOrMore(graph | view | server_process | client_process | view_transition | process_transition | string | blank) return state_machine.parseString(src)
class VhdlUseStatement(VhdlConstruct): PP_DEFINITION = pp.LineStart().leaveWhitespace() + pp.Literal( "use") + PP_IDENTIFIER("library") + pp.Literal(".") + PP_IDENTIFIER( "element") + pp.Literal(".") + PP_IDENTIFIER("object") def _Parse(self, parts: pp.ParseResults): self.library = parts.get("library") self.element = parts.get("element") self.object = parts.get("object")
def get_vars(top_scoped_text): #TODO match var ' a' #TODO match var `b` in `a,b=2,3` #TODO match var `b` in `a=2;b=2` #TODO match var `(a,b)` pp_vars = ( pp.LineStart() + pp_identifier ) parsed_vars = pp_vars.scanString(top_scoped_text) return _matched_only(parsed_vars)
def get_functions_and_types(top_scoped_text): pp_functions = (pp.LineStart() + (pp.Literal("function") | pp.Literal("type") | pp.Literal("immutable") | pp.Literal("abstract") | pp.Literal("typealias")).suppress() + pp_identifier) parsed_functions = pp_functions.scanString(top_scoped_text) functions = _matched_only(parsed_functions) return functions
class GenericListAttribute(Node): __fragments__ = { 'name': 'attribute-name', 'value': lambda t: t['attribute-value'].asList() } __syntax__ = (p.LineStart().suppress() + p.NotAny(p.White(' ')) + p.Optional(p.White('\t')).suppress() + AttributeName + p.Literal(':').suppress() + p.LineEnd().suppress() + GenericListAttributeValue).setResultsName("attribute")
def _metaParser(): # --- meta parser --- metaIndicator = p.LineStart() + p.Suppress(p.Literal('*')) metaName = p.Word(p.printables) metaSeparator = p.Suppress(p.Literal('=')) # TODO force case insensitivity in attributeMode keyword match # TODO add debug names # TODO add a conditional debug flag metavalue = p.Combine(p.restOfLine() + p.Suppress(p.LineEnd())) metaList = metaIndicator + metaName + metaSeparator + metavalue return metaList
def generate_expr(): number = pp.Literal("number").setParseAction(lambda: {"type": "number"}) string = pp.Literal("string").setParseAction(lambda: {"type": "string"}) count = pp.Literal("count") \ .setParseAction(lambda: {"type": "integer", "minimum": 0}) expr = pp.Forward() tuple = (pp.Suppress(pp.Literal("(")) + pp.delimitedList(expr) + pp.Suppress(pp.Literal(")"))).setParseAction(tuple_validator) array = (length + pp.Suppress("*") + expr).setParseAction(array_validator) expr << (tuple | number | string | count | array) return pp.LineStart() + expr + pp.LineEnd() # throw error on extra stuff
def metaParser(): # --- meta parser --- metaIndicator = p.LineStart() + p.Suppress(p.Literal('*')) metaName = p.Word(p.alphanums).setResultsName('metaname') metaSeparator = p.Suppress(p.Literal('=')) # TODO force case insensitivity in attributeMode keyword match # TODO add debug names # TODO add a conditional debug flag metavalue = p.Combine(p.restOfLine() + p.Suppress(p.LineEnd())).setResultsName( 'metavalue') metaList = p.Dict( p.Group(metaIndicator + metaName + metaSeparator + metavalue)) return metaList
class Record(Node): """ Single standalone entry of `pactl list`. The record is composed of a name and a list of attributes. Pulseaudio exposes objects such as cards, sinks and sources as separate records. Each attribute may be of a different type. Some attributes are simple values while others have finer structure, including lits and even additional recursive attributes. """ __fragments__ = { 'name': 'record-name', 'attribute_list': lambda t: t['record-attributes'].asList(), 'attribute_map': lambda t: OrderedDict( (attr.name, attr) for attr in t['record-attributes'].asList()), } __syntax__ = ( p.LineStart() + p.NotAny(p.White(' \t')) + p.Regex("[A-Z][a-zA-Z ]+ #[0-9]+").setResultsName("record-name") + p.LineEnd().suppress() + p.OneOrMore( p.Or([ GenericListAttribute.Syntax, GenericSimpleAttribute.Syntax, ]) ).setResultsName("record-attributes") ).setResultsName("record") def as_json(self): return { 'name': self.name, 'attribute_list': self.attribute_list, } def __repr__(self): # Custom __repr__ that skips attribute_map return "{}({})".format( type(self).__name__, ", ".join([ "{}={!r}".format(attr, getattr(self, attr)) for attr in ['name', 'attribute_list']]))
def _attributeParser(): # --- attribute parser --- attributeIndicator = p.LineStart() + p.Suppress(p.Literal('@')) attributeName = p.Word(p.alphanums).setResultsName('attributename') attributeSeparator = p.Suppress(p.Literal('::')) # TODO force case insensitivity in attributeMode keyword match # TODO add debug names # TODO add a conditional debug flag attributeMode = (p.Word(MODE_KEYWORD_SINGLE) | p.Word(MODE_KEYWORD_MULTIPLE)).setResultsName( 'attributemode') + p.Literal(':').suppress() attributeType = (p.Word( p.alphanums).setResultsName('attributetype')).setParseAction(caps) attributePosargs = p.ZeroOrMore( (p.Word(p.alphanums) | p.Combine(p.Literal('[') + p.SkipTo(']') + p.Literal(']'))) + ~p.FollowedBy(p.Literal('=')) + p.Optional(p.Literal(',').suppress())).setResultsName('posargs') kwargprintables = p.printables.translate( str.maketrans('', '', '=,[]()')) attributeKwargs = p.ZeroOrMore( p.Group( p.Word(p.alphanums).setResultsName('keyword') + p.Literal('=').suppress() + (p.Word(kwargprintables) | p.Combine( p.Literal('[').suppress() + p.SkipTo(']') + p.Literal(']').suppress())).setResultsName('value') + p.Optional( p.Literal(',').suppress() ) #TODO figure out how to make quotes work as enclosers instead of [] )).setResultsName('kwargs') attributeArgs = ( p.Literal('(').suppress() + attributePosargs + attributeKwargs + p.Literal(')').suppress()).setResultsName('attributeargs') attributeList = attributeIndicator + attributeName + attributeSeparator + \ attributeMode + attributeType + p.Optional(attributeArgs) return attributeList
def __init__(self): self.session_keys = {} self.encrypted_data = None self.info = {} self.base64_buf = [] self.key_block = False self.data_block = False self.p1735 = False protect_kw = pp.Keyword('`pragma protect').suppress() identifier = pp.Word(pp.alphas, pp.alphanums + "_") number = pp.Word(pp.nums).setParseAction(lambda t: int(t[0])) string = pp.dblQuotedString().setParseAction(pp.removeQuotes) equals = pp.Suppress("=") lbrace = pp.Suppress('(') rbrace = pp.Suppress(')') simpleAssignment = (identifier + equals + (number | string)).setParseAction( self.assignment_action) multiAssignment = simpleAssignment + pp.ZeroOrMore(',' + simpleAssignment) tupleAssignment = identifier + equals + lbrace + multiAssignment + rbrace assignment = protect_kw + (multiAssignment | tupleAssignment) PSTART = (protect_kw + pp.CaselessLiteral('begin_protected')).setParseAction( self.begin) PFINISH = (protect_kw + pp.CaselessLiteral('end_protected')).setParseAction( self.finish) key_block = (protect_kw + pp.CaselessLiteral('key_block')).setParseAction( self.begin_key_block) data_block = (protect_kw + pp.CaselessLiteral('data_block')).setParseAction( self.begin_data_block) base64_string = pp.Word(pp.alphanums + "+-/=").setParseAction( self.base64_action) emptyLine = (pp.LineStart() + pp.LineEnd()).suppress() self.parser = (PSTART | assignment | key_block | data_block | base64_string | emptyLine | PFINISH)
def message_parser(text): message = parse_1(text)['message'] time = parse_1(text)['time'] spaces = pp.White('\t \n\r') hours = pp.Word(pp.nums)('hours') minutes = pp.Word(pp.nums)('minutes') l0 = list(shift_selector.keys()) pretime = (pp.Optional(pp.Word(pp.nums))('days') + pp.oneOf(l0)('shift'))('pretime') # time0 = (hours + pp.oneOf(list(':.чЧдД')) + minutes + pp.Optional(pp.oneOf(list('мМ'))) | # hours + pp.Optional(pp.oneOf(list(':.чЧдД'))) | # minutes + pp.Optional(pp.oneOf(list('мМ'))))('time') time0 = ((hours + pp.oneOf(list(':.чЧ')) + minutes + pp.Optional(pp.oneOf(list('мМ')))) | minutes + pp.oneOf(list('мМ')) | (hours + pp.Optional(pp.oneOf(list(':.чЧ')))))('time') time_parser = pp.LineStart() + pp.Optional(pretime) + time0 + pp.Optional( spaces) + pp.LineEnd() try: tim = time_parser.parseString(time) except: return [] else: out_days = assign(tim, 'days') out_shift = assign(tim, 'shift') out_hours = assign(tim, 'hours') out_minutes = assign(tim, 'minutes') print('days = ' + out_days) print('shft = ' + out_shift) print('hors = ' + out_hours) print('mins = ' + out_minutes) if out_days: out_days = int(out_days) else: out_days = 0 if out_hours: out_hours = int(out_hours) else: out_hours = 0 if out_minutes: out_minutes = int(out_minutes) else: out_minutes = 0 return [message, out_shift, out_days, out_hours, out_minutes]
def attributeParser(): # --- attribute parser --- attributeIndicator = p.LineStart() + p.Suppress(p.Literal('@')) attributeName = p.Word(p.alphanums).setResultsName('attributename') attributeSeparator = p.Suppress(p.Literal('::')) # TODO force case insensitivity in attributeMode keyword match # TODO add debug names # TODO add a conditional debug flag attributeMode = (p.Word(MODE_KEYWORD_SINGLE) | p.Word(MODE_KEYWORD_MULTIPLE)).setResultsName( 'attributemode') + p.Literal(':').suppress() attributeType = (p.Word( p.alphanums).setResultsName('attributetype')).setParseAction(caps) attributePosargs = p.ZeroOrMore( (p.Word(p.alphanums) | p.Combine(p.Literal('[') + p.SkipTo(']') + p.Literal(']'))) + ~p.FollowedBy(p.Literal('=')) + p.Optional(p.Literal(',').suppress())).setResultsName('posargs') attributeKwargs = p.ZeroOrMore( p.Group( p.Word(p.alphanums).setResultsName('keyword') + p.Literal('=').suppress() + (p.Word(p.alphanums) | p.Combine(p.Literal('[') + p.SkipTo(']') + p.Literal(']'))).setResultsName('value') + p.Optional(p.Literal(',').suppress()))).setResultsName( 'kwargs') attributeArgs = ( p.Literal('(').suppress() + attributePosargs + attributeKwargs + p.Literal(')').suppress()).setResultsName('attributeargs') attributeList = p.Group(attributeIndicator + attributeName + attributeSeparator + attributeMode + attributeType + p.Optional(attributeArgs)) return attributeList
def _build_txt_parser(): separator = pp.Suppress('=') key = pp.Literal('%') + pp.Word(pp.printables, excludeChars='=') value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress() key_value = key + separator + value header = (pp.LineStart().suppress() + pp.Word(pp.nums) + pp.ZeroOrMore(pp.White().suppress() + pp.Word(pp.nums)) + pp.LineEnd().suppress()) column_heading = pp.Literal('%') + pp.Word( pp.printables, excludeChars='=') + separator + value txt_row = pp.delimitedList( pp.Word(pp.nums + '.+-e_') | pp.Literal('custom')) + pp.LineEnd().suppress() return pp.Optional(header) + pp.ZeroOrMore(pp.Dict(pp.Group(block))).setResultsName('meta') + \ column_heading.setResultsName('columnHeading') + \ pp.Group(pp.OneOrMore(pp.Group(csv_row))).setResultsName('textValues')
def _build_sample_parser(): separator = pp.Suppress('=') key = pp.LineStart() + pp.Literal('%').suppress() + pp.Word( pp.printables, excludeChars='=') value = pp.Regex(r'[^\r%]*') | pp.Empty() + pp.LineEnd().suppress() element = pp.Word(pp.alphas) elements = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('elements') + separator + element + pp.ZeroOrMore(pp.White(ws='\t ').suppress() + element) + pp.LineEnd().suppress()) compositions = pp.Group( pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('compositions') + separator + pyparsing_common.number + pp.ZeroOrMore(pp.White(ws='\t ').suppress() + pyparsing_common.number) + pp.LineEnd().suppress()) epoch = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('Epoch') + separator + pyparsing_common.number + pp.LineEnd().suppress()) sample = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('Sample') + separator + pyparsing_common.number + pp.LineEnd().suppress()) key_value = (sample | epoch | elements | compositions | pp.Group(key + separator + value)) row_separator = pp.White(ws='\t ').suppress() row = (pp.LineStart().suppress() + pyparsing_common.number + pp.ZeroOrMore(row_separator + pyparsing_common.number) + pp.LineEnd().suppress()) return pp.OneOrMore(pp.Dict(key_value)).setResultsName('meta') + \ pp.Group(pp.ZeroOrMore(pp.Group(row))).setResultsName('values')
VOID = pp.Group(pp.Keyword("void") + ~PTR).suppress() ARG = pp.Group( pp.Optional(CONST("is_const")) + (NATIVE_TYPENAME | NAME)("typename") + pp.Optional(PTR("ptr")) + NAME("name") + pp.Optional(ARRAY_DIM("size"))) RET = pp.Group( pp.Optional(CONST("is_const")) + (NATIVE_TYPENAME | NAME)("typename") + pp.Optional(PTR("ptr"))) FUNCTION_DECL = ((VOID | RET("return_value")) + NAME("name") + LPAREN + (VOID | pp.delimitedList(ARG, delim=COMMA)("arguments")) + RPAREN + SEMI) MJAPI_FUNCTION_DECL = pp.Group( pp.Optional(MULTILINE_COMMENT("comment")) + pp.LineStart() + MJAPI + FUNCTION_DECL) # e.g. # // predicate function: set enable/disable based on item category # typedef int (*mjfItemEnable)(int category, void* data); FUNCTION_PTR_TYPE_DECL = pp.Group( pp.Optional(MULTILINE_COMMENT("comment")) + TYPEDEF + RET("return_type") + LPAREN + PTR + NAME("typename") + RPAREN + LPAREN + (VOID | pp.delimitedList(ARG, delim=COMMA)("arguments")) + RPAREN + SEMI) # Global variables. # ------------------------------------------------------------------------------ MJAPI_STRING_ARRAY = (MJAPI + EXTERN + CONST + pp.Keyword("char") + PTR + NAME("name") + pp.OneOrMore(ARRAY_DIM)("dims") + SEMI)
class sparc_syntax: divide = False noprefix = False comment = pp.Regex(r'\#.*') symbol = pp.Regex(r'[A-Za-z_.$][A-Za-z0-9_.$]*').setParseAction( lambda r: env.ext(r[0], size=32)) mnemo = pp.LineStart() + symbol + pp.Optional(pp.Literal(',a')) mnemo.setParseAction(lambda r: r[0].ref.lower() + ''.join(r[1:])) integer = pp.Regex(r'[1-9][0-9]*').setParseAction(lambda r: int(r[0], 10)) hexa = pp.Regex(r'0[xX][0-9a-fA-F]+').setParseAction( lambda r: int(r[0], 16)) octa = pp.Regex(r'0[0-7]*').setParseAction(lambda r: int(r[0], 8)) bina = pp.Regex(r'0[bB][01]+').setParseAction(lambda r: int(r[0], 2)) char = pp.Regex(r"('.)|('\\\\)").setParseAction(lambda r: ord(r[0])) number = integer | hexa | octa | bina | char number.setParseAction(lambda r: env.cst(r[0], 32)) term = symbol | number exp = pp.Forward() op_one = pp.oneOf("- ~") op_sig = pp.oneOf("+ -") op_mul = pp.oneOf("* /") op_cmp = pp.oneOf("== != <= >= < > <>") op_bit = pp.oneOf("^ && || & |") operators = [ (op_one, 1, pp.opAssoc.RIGHT), (op_sig, 2, pp.opAssoc.LEFT), (op_mul, 2, pp.opAssoc.LEFT), (op_cmp, 2, pp.opAssoc.LEFT), (op_bit, 2, pp.opAssoc.LEFT), ] reg = pp.Suppress('%') + pp.NotAny(pp.oneOf('hi lo')) + symbol hilo = pp.oneOf('%hi %lo') + pp.Suppress('(') + exp + pp.Suppress(')') exp << pp.operatorPrecedence(term | reg | hilo, operators) adr = pp.Suppress('[') + exp + pp.Suppress(']') mem = adr #+pp.Optional(symbol|imm) mem.setParseAction(lambda r: env.mem(r[0])) opd = exp | mem | reg opds = pp.Group(pp.delimitedList(opd)) instr = mnemo + pp.Optional(opds) + pp.Optional(comment) def action_reg(toks): rname = toks[0] if rname.ref.startswith('asr'): return env.reg(rname.ref) return env.__dict__[rname.ref] def action_hilo(toks): v = toks[1] return env.hi(v) if toks[0] == '%hi' else env.lo(v).zeroextend(32) def action_exp(toks): tok = toks[0] if isinstance(tok, env.exp): return tok if len(tok) == 2: op = tok[0] r = tok[1] if isinstance(r, list): r = action_exp(r) return env.oper(op, r) elif len(tok) == 3: op = tok[1] l = tok[0] r = tok[2] if isinstance(l, list): l = action_exp(l) if isinstance(r, list): r = action_exp(r) return env.oper(op, l, r) else: return tok def action_instr(toks): i = instruction('') i.mnemonic = toks[0] if len(toks) > 1: i.operands = toks[1][0:] return asmhelper(i) # actions: reg.setParseAction(action_reg) hilo.setParseAction(action_hilo) exp.setParseAction(action_exp) instr.setParseAction(action_instr)
class LogicalLineExtractor: DEFAULT_WHITESPACE = ' \t' BACKSLASH = '\\' HASH_MARK = '#' UNICODE_PRINTABLES = pp.printables # pp.pyparsing_unicode.printables # STANDARD_CHARS = UNICODE_PRINTABLES.replace(BACKSLASH, '').replace(HASH_MARK, '') EOL = pp.LineEnd().suppress() SOL = pp.LineStart().leaveWhitespace() COMMENT = (HASH_MARK + pp.restOfLine).suppress() CONTINUATION = (pp.Literal(BACKSLASH).leaveWhitespace() + EOL).suppress() BLANKLINE = SOL + pp.LineEnd() | SOL + COMMENT + pp.LineEnd() @classmethod def parse_dockerfile(cls, context): """ Parses the logical lines of a Dockerfile with pyparse. Returns tuples of tokens and physical line numbers. """ parser = cls._parser() logical_lines = list() for line in parser.parseString(context): if line: logical_lines.append(line) return logical_lines @classmethod def _parser(cls): # Exclude newlines from the default whitespace characters # We need to deal with them manually pp.ParserElement.setDefaultWhitespaceChars(cls.DEFAULT_WHITESPACE) line = cls._line() body = pp.OneOrMore(line) parser = body + pp.StringEnd() parser.ignore(cls.BLANKLINE) parser.ignore(cls.COMMENT) return parser @classmethod def _line(cls): text = cls._free_form_text() physical_line = text + cls.EOL physical_line.setParseAction(lambda origString, loc, tokens: (tokens[ 0][0].rstrip(), pp.lineno(loc, origString))) logical_line = pp.OneOrMore(text + cls.CONTINUATION) + physical_line logical_line.setParseAction(lambda origString, loc, tokens: (''.join( [x[0].lstrip() for x in tokens]), pp.lineno(loc, origString))) line = physical_line | logical_line | cls.EOL return line @classmethod def _free_form_text(cls): # Free-form text includes internal whitespace, but not leading or trailing text = pp.OneOrMore( pp.White(cls.DEFAULT_WHITESPACE) | pp.QuotedString( "'", multiline=False, escChar="\\", unquoteResults=False) | pp.QuotedString( '"', multiline=False, escChar="\\", unquoteResults=False) | pp.Word(cls.UNICODE_PRINTABLES, excludeChars=cls.HASH_MARK + cls.BACKSLASH) | cls._escape_codes()) text.setParseAction(lambda origString, loc, tokens: (''.join(tokens), pp.lineno(loc, origString))) return text @classmethod def _escape_codes(cls): escaped_hash = pp.Literal(cls.BACKSLASH + cls.HASH_MARK) escaped_backslash = pp.Literal(cls.BACKSLASH + cls.BACKSLASH) return (cls.BACKSLASH + pp.Word(cls.UNICODE_PRINTABLES, exact=1) ) | escaped_hash | escaped_backslash
RET = pp.Group( pp.Optional(CONST("is_const")) + (NATIVE_TYPENAME | NAME)("typename") + pp.Optional(PTR("ptr"))) FUNCTION_DECL = ( (VOID | RET("return_value")) + NAME("name") + LPAREN + (VOID | pp.delimitedList(ARG, delim=COMMA)("arguments")) + RPAREN + SEMI) MJAPI_FUNCTION_DECL = pp.Group( pp.Optional(MULTILINE_COMMENT("comment")) + pp.LineStart() + MJAPI + FUNCTION_DECL) # e.g. # // predicate function: set enable/disable based on item category # typedef int (*mjfItemEnable)(int category, void* data); FUNCTION_PTR_TYPE_DECL = pp.Group( pp.Optional(MULTILINE_COMMENT("comment")) + TYPEDEF + RET("return_type") + LPAREN + PTR + NAME("typename") + RPAREN + LPAREN +
import pyparsing as pp key = pp.LineStart().suppress() + pp.Word(pp.alphanums) + pp.Suppress(':') value = pp.restOfLine + pp.LineEnd().suppress() kvParser = pp.dictOf(key, value) ### session_name = pp.OneOrMore(pp.Word(pp.alphanums)) session = (pp.LineStart().suppress() + pp.Suppress('[') + session_name + pp.Suppress(']') + pp.LineEnd().suppress()) zeroOrMoreEmptyLines = pp.ZeroOrMore(pp.LineEnd().suppress()) line = pp.LineStart() \ + pp.SkipTo(pp.LineEnd(), failOn=session) \ + pp.LineEnd().suppress() lines = pp.Group(pp.ZeroOrMore(line.leaveWhitespace())) contentParser = pp.dictOf(session, lines) Parser = pp.Keyword('[Metadata]').suppress() \ + pp.LineEnd().suppress() \ + kvParser.setResultsName('Metadata') \ + pp.Keyword('[Tags]').suppress() \ + pp.LineEnd().suppress() \ + kvParser.setResultsName('Tags') \ + pp.Group(contentParser).setResultsName('Content')
def chordpro2html(song): title = "Unknown Title" artist = "Unknown Artist" output = '' SongState = Enum('SongState', 'NONE VERSE CHORUS TAB BRIDGE') songState = SongState.NONE textBuf = '' # pyparsing parse-action handler def handleEmptyLine(t): # switch-bak songState nonlocal songState if songState == SongState.VERSE: # reset from default state songState = SongState.NONE return "</div>\n<br>" else: return "<br>" def handleSongLine(t): # postponed handling of total line nonlocal textBuf, songState lineHasText = len(textBuf.strip()) > 0 textBuf = '' # not needed any longer line = '' # prepare output-line if songState == SongState.NONE: # default state! songState = SongState.VERSE line += '<div class="verse">' line += '<div class="songline">' for item in t: line += '<div class="chordbox">' # chord+text box --------------------------------------- if len(item) == 2: if args.wrap_chords: line += '<div class="chord">' + item.chord + '</div>' else: line += '<div class="chord">' + item.chord[1:-1] + '</div>' if lineHasText: line += '<div class="text">' + item.text.replace( ' ', ' ') + '</div>' # single chord box --------------------------------------- elif len(item) == 1 and len(item.chord) > 0: if args.wrap_chords: line += '<div class="chord">' + item.chord + '</div>' else: line += '<div class="chord">' + item.chord[1:-1] + '</div>' if lineHasText: line += '<div class="text"> </div>' # single text box --------------------------------------- elif len(item) == 1 and len(item.text) > 0: line += '<div class="text">' + item.text.replace( ' ', ' ') + '</div>' # unhandled... else: logging.info(item.dump()) line += '</div>' # ...chordbox line += '</div>' # ...songLine return line def handleText(t): # store text in shadow buffer for later analysis nonlocal textBuf textBuf += t[0] return t def handleEnvDirective(t): # switch songState nonlocal songState token = t[0].strip().lower() strReturn = '' if songState != SongState.NONE: # force switching! songState = SongState.NONE strReturn += '</div>' if token in ['start_of_chorus', 'soc']: songState = SongState.CHORUS strReturn += '<div class="chorus">' elif token in ['start_of_tab', 'sot']: songState = SongState.TAB strReturn += '<div class="tab">' return strReturn def handleFormDirective(t): # only comments so far.... token = t[0].strip().lower() arg = t[1] strReturn = '' if token in ['comment', 'c']: arg = arg.replace('\n', '<br>') strReturn += '<div class="comment">' + arg + '</div>' elif token in ['comment_box', 'cb']: arg = arg.replace('\n', '<br>') strReturn += '<div class="commentbox">' + arg + '</div>' else: # unhandled... logging.info(t.dump()) return strReturn def handleMetaDirective(t): nonlocal title, artist token = t[0].strip().lower() arg = t[1] strReturn = '' if token in ['title', 't']: title = arg strReturn += '<div class="title">' + arg + '</div>' elif token in ['artist', 'a']: artist = arg strReturn += '<div class="artist">' + arg + '</div>' else: # unhandled... logging.info(t.dump()) return strReturn # pyparsing grammar definition: directives pp.ParserElement.setDefaultWhitespaceChars('') #lyricCharSet = pp.alphanums+pp.alphas8bit+",-_:;.!?#+*^°§$%&/|()='`´\\\"\t " # everything but "{}[]" lyricCharSet = pp.pyparsing_unicode.Latin1.printables + "\t " chordCharSet = pp.alphanums + " -#(%)/='`´." cmd = pp.oneOf("title t artist a") arg = pp.SkipTo('}') metaDirective = pp.Suppress('{') + cmd + pp.Suppress(':') + arg metaDirective.setParseAction(handleMetaDirective) cmd = pp.oneOf("comment c comment_box cb") arg = pp.SkipTo('}') formDirective = pp.Suppress('{') + cmd + pp.Suppress(':') + arg formDirective.setParseAction(handleFormDirective) cmd = pp.oneOf( "start_of_chorus soc end_of_chorus eoc start_of_tab sot end_of_tab eot" ) envDirective = pp.Suppress('{') + cmd + pp.Suppress('}') envDirective.setParseAction(handleEnvDirective) directives = metaDirective | formDirective | envDirective # pyparsing grammar definition: chordlines whiteSpaces = pp.Word(" \t") emptyLine = pp.LineStart() + pp.Optional( whiteSpaces) + pp.LineEnd() # incl. whiteSpaces emptyLine.setParseAction(handleEmptyLine) lineStart = pp.LineStart() lineEnd = pp.Suppress( pp.LineEnd()) ####### needs Unix type line-endings (at the moment...) chord = pp.Combine('[' + pp.Word(chordCharSet) + ']') # leave sqare brackets there.... text = pp.Word(lyricCharSet, excludeChars="[]{}") text.setParseAction(handleText) chordbox = pp.Group( \ (chord("chord") + whiteSpaces("text")) | # whiteSpaces after chord seperates the chord from further text \ (chord("chord") + text("text")) | # standard chordbox with chord AND text \ chord("chord") | # single chord w/o text \ text("text")) # single text w/o chord songLine = lineStart + pp.OneOrMore(chordbox) + lineEnd songLine.setParseAction(handleSongLine) markup = emptyLine | songLine | directives # >emptyLine< MUST be bofore >songLine< to catch emptyLine-action for result in markup.searchString(song): output += result[0] + '\n' #logging.info(output) return output, title, artist