def _build_ana_rcp_parser(): separator = pp.Suppress(':') key = pp.Word(pp.printables, excludeChars=':') value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress() block_name = key + separator + pp.LineEnd().suppress() platemap_keylist = pp.Literal( 'platemap_comp4plot_keylist') + separator + pp.delimitedList( pp.Word(pp.alphas)) run_ids = pp.Literal('run_ids') + separator + pp.delimitedList( pyparsing_common.integer) plate_id = (pp.Literal('plate_ids') | pp.Literal('plate_id')) + separator + pyparsing_common.integer key_value = (platemap_keylist | run_ids | plate_id | key + separator + value) indent_stack = [1] block = pp.Forward() block_body = (block | key_value) indented_block = pp.Dict( pp.ungroup(pp.indentedBlock(block_body, indent_stack))) block << (block_name + indented_block | key_value) return pp.OneOrMore(pp.Dict(pp.Group(block)))
def lexical_analysis(self, src): string = pp.Regex('[a-zA-Z0-9_{}"=+\-*/\.:;&%@$#<>? a-zA-Zぁ-ゔゞァ-・ヽヾ゛゜ー一-龯]+') blank = pp.LineStart() + pp.LineEnd() start = '[' end = ']' + pp.LineEnd() graph_tag = pp.LineStart() + '@' graph = graph_tag + start + string + end view_tag = pp.LineStart() + '#' view = view_tag + start + string + end server_process_tag = pp.LineStart() + '$' server_process = server_process_tag + start + string + end client_process_tag = pp.LineStart() + '%' client_process = client_process_tag + start + string + end view_transition_identifier = pp.LineStart() + '-->' view_transition = view_transition_identifier + string process_transition_identifier = pp.LineStart() + '==>' process_transition = process_transition_identifier + string state_machine = pp.OneOrMore(graph | view | server_process | client_process | view_transition | process_transition | string | blank) return state_machine.parseString(src)
def parse_diearea(self): EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word( pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y self.events[0].wait() # event[0] (parse_dbuPerMicron) has priority diearea_id = pp.Keyword('DIEAREA') diearea = pp.Group( pp.Suppress(diearea_id) + pp.OneOrMore(pt) + linebreak).setResultsName('DIEAREA') return diearea
def parse_netlist(netlist='', types={}): """Return graph and controlled sources dictionary of netlist""" COMMENT = "*" + parse.Optional(parse.restOfLine) CMD = "." + parse.Optional(parse.restOfLine) NAME = parse.Word(parse.alphanums + "_") TYPE = parse.oneOf('R L C V I E F G H O N U', caseless=True) ELEMENT = parse.Combine(TYPE + parse.Optional(NAME)) LINE = ELEMENT + NAME + NAME LINE += parse.Optional(~parse.LineEnd() + NAME) LINE += parse.Optional(~parse.LineEnd() + NAME) LINE += parse.Optional(~parse.LineEnd() + NAME) NETLIST = parse.ZeroOrMore(parse.Group(LINE)) NETLIST.ignore(COMMENT) NETLIST.ignore(CMD) graph = {} ctrl_src = {} for item in NETLIST.parseString(netlist).asList(): brn = item[0] vals = item[1:] if brn not in graph: graph[brn] = vals[:2] else: msg = 'Branch {} is already in graph: {}'.format(brn, graph[brn]) raise Exception(msg) if btype(brn, types) in 'FH': ctrl_src[brn] = vals[2] if btype(brn, types) in 'EG': if len(vals) == 3: ctrl_src[brn] = vals[2] elif len(vals) > 3: # insert open circuit for controlled voltage graph['O' + brn] = vals[2:3 + 1] ctrl_src[brn] = 'O' + brn return Graph(graph), ctrl_src
def asn1_loads(asn1_str): """ Parse an ASN.1 file This is currently Pseudo-ASN; modify to become actual ASN.1 """ # ASN.1 grammar identifier = pp.Word(pp.alphas + "_") assign = pp.Literal("::=") # typedef = identifier.setName("typeref") + assign + identifier.setName("basetype") comment1 = pp.Literal("#") + pp.originalTextFor(pp.SkipTo(pp.LineEnd())) # typelist = pp.OneOrMore(typedef) meta1 = pp.LineStart() + identifier + pp.Literal(":") + pp.SkipTo( pp.LineEnd()).setDebug() meta2 = pp.LineStart() + pp.White() + pp.SkipTo(pp.LineEnd()).setDebug() metaval = meta1 + pp.ZeroOrMore(meta2) # metalist = pp.ZeroOrMore(comment1) + pp.Literal("/*") + pp.OneOrMore(metaval) + pp.Literal("*/") metalist = pp.SkipTo(pp.Literal("/*")).setDebug() + pp.Literal( "/*") + pp.OneOrMore(metaval).setDebug() + pp.Literal("*/") asn1 = metalist.parseString(asn1_str, parseAll=False) print(asn1) jaen = {"meta": {}, "types": []} return jaen
def _build_csv_parser(): separator = pp.Suppress(':') key = pp.Word(pp.printables, excludeChars=':') value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress() block_name = key + separator + pp.LineEnd().suppress() key_value = key + separator + value header = (pp.LineStart().suppress() + pp.Word(pp.nums) + pp.ZeroOrMore( pp.White().suppress() + pp.Word(pp.nums)) + pp.LineEnd().suppress()) csv_header = pp.delimitedList(pp.Word(pp.printables, excludeChars=',')) + pp.LineEnd().suppress() csv_row = pp.delimitedList(pp.Word(pp.nums + ';.+-e_') | pp.Literal('custom')) + pp.LineEnd().suppress() indent_stack = [1] block = pp.Forward() block_body = ( block | key_value) indented_block = pp.Dict(pp.ungroup(pp.indentedBlock(block_body, indent_stack))) block << ( block_name + indented_block | key_value) return pp.Optional(header) + pp.ZeroOrMore(pp.Dict(pp.Group(block))).setResultsName('meta') + \ csv_header.setResultsName('csvHeader') + \ pp.Group(pp.OneOrMore(pp.Group(csv_row))).setResultsName('csvValues')
def parse_dbuPerMicron(self): EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word( pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y self.events[0].wait() # event[0] (parse_dbuPerMicron) has priority dbuPerMicron_id = pp.Keyword('UNITS DISTANCE MICRONS') dbuPerMicron = dbuPerMicron_id + number('dbuPerMicron') + linebreak return dbuPerMicron
class PortWithProfile(Node): """ Variant of :class:`Port` that is used by "card" records inside the "Ports" property. It differs from the normal port syntax by having different entries inside the last section. Availability is not listed here, only priority. Priority does not have a colon before the actual number. This port is followed by profile assignment. """ __fragments__ = { 'name': 'port-name', 'label': 'port-label', 'priority': 'port-priority', 'latency_offset': 'port-latency-offset', 'availability': 'port-availability', 'properties': lambda t: t['port-properties'].asList(), 'profile_list': lambda t: t['port-profile-list'].asList(), } __syntax__ = ( p.Word(p.alphanums + "-;").setResultsName('port-name') + p.Suppress(':') # This part was very tricky to write. The label is basically arbitrary # localized Unicode text. We want to grab all of it in one go but # without consuming the upcoming and latest '(' character or the space # that comes immediately before. # # The syntax here combines a sequence of words, as defined by anything # other than a space and '(', delimited by a single whitespace. + p.Combine( p.OneOrMore(~p.FollowedBy(p.Regex('\(.+?\)') + p.LineEnd()) + p.Regex('[^ \n]+') + p.White().suppress()), ' ').setResultsName('port-label') + p.Suppress('(') + p.Keyword('priority').suppress() + p.Optional(p.Suppress(':')) + p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName( 'port-priority') + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Keyword('latency offset:').suppress() + p.Word(p.nums).setParseAction(lambda t: int(t[0])) + p.Literal("usec").suppress(), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-latency-offset')) + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Literal('not available'), p.Suppress(',') + p.Literal('available'), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-availability')) + p.Suppress(')') + p.LineEnd().suppress() + p.Optional( p.MatchFirst([ p.LineStart().suppress() + p.NotAny(p.White(' ')) + p.White('\t').suppress() + p.Keyword('Properties:').suppress() + p.LineEnd().suppress() + PropertyAttributeValue, p.Empty().setParseAction(lambda t: []) ]).setResultsName('port-properties')) + p.White('\t', max=3).suppress() + p.Literal("Part of profile(s)").suppress() + p.Suppress(":") + p.delimitedList( p.Word(p.alphanums + "+-:"), ", ").setResultsName("port-profile-list")).setResultsName("port")
def UnxParser(): COMMENT_CHAR = '%%' WHITESPACE = ' \t\r' PRIMITIVES = '`ski' VARIABLES = pp.alphas VARIABLE_DECL = '^' VARIABLE_USE = '$' PURE_FUNCTION_START = '<' PURE_FUNCTION_END = '>' IMPURE_FUNCTION_START = '[' IMPURE_FUNCTION_END = ']' FUNCTION_CHARS = pp.alphanums + '_' TYPE_CHARS = FUNCTION_CHARS TYPE_ARROW = '->' FUNCTION_DEFINITION_SEPARATOR = '::' INCLUDE_KEYWORD = 'include' INCLUDE_FILE_CHARS = pp.alphanums + '_./' pp.ParserElement.setDefaultWhitespaceChars(WHITESPACE) # Comments and blank lines comment = pp.Literal(COMMENT_CHAR) + pp.ZeroOrMore(pp.Word(pp.printables, excludeChars='\n')) newlines = pp.Group(pp.OneOrMore((comment + pp.LineEnd()) | pp.LineEnd())).setName('new line(s)').suppress() # Function definition pure_function = pp.Group(pp.Char(PURE_FUNCTION_START) + pp.Word(FUNCTION_CHARS).setResultsName('pure_function') + pp.Char(PURE_FUNCTION_END)) impure_function = pp.Group(pp.Char(IMPURE_FUNCTION_START) + pp.Word(FUNCTION_CHARS).setResultsName('impure_function') + pp.Char(IMPURE_FUNCTION_END)) function_sep = pp.Literal(FUNCTION_DEFINITION_SEPARATOR).suppress() variable_declaration = pp.Literal(VARIABLE_DECL).suppress() + pp.Char(VARIABLES) variable_use = pp.Literal(VARIABLE_USE).suppress() + pp.Group(pp.Char(VARIABLES).setResultsName('variable')) function_body = pp.ZeroOrMore(variable_declaration).setResultsName('variables') + pp.OneOrMore(pp.Group(pp.Char(PRIMITIVES).setResultsName('primitive')) | pure_function | impure_function | variable_use).setResultsName('body') type_ = pp.Word(TYPE_CHARS) type_arrow = pp.Literal(TYPE_ARROW).suppress() function_signature = (type_ + pp.ZeroOrMore(type_arrow + type_)).setResultsName('signature') function = pp.Group((pure_function.setResultsName('name') | impure_function.setResultsName('name')) + pp.Optional(function_sep + function_signature) + function_sep + function_body).setResultsName('function') # Includes include_file = pp.Word(INCLUDE_FILE_CHARS) include_statement = (pp.Keyword(INCLUDE_KEYWORD).suppress() + pp.OneOrMore(include_file)).setResultsName('includes') parser = pp.Optional(pp.Optional(include_statement) + newlines) + (pp.Group(function) + pp.ZeroOrMore(pp.Group(newlines + function))).setResultsName('definitions') + pp.Optional(newlines) return parser
def parse_spectre(netlist_string): # newlines are part of the grammar, thus redifine the whitespaces without it ws = ' \t' _p.ParserElement.setDefaultWhitespaceChars(ws) # spectre netlist grammar definition EOL = _p.LineEnd().suppress() # end of line linebreak = _p.Suppress( "\\" + _p.LineEnd()) # breaking a line with backslash newline identifier = _p.Word(_p.alphanums + '_!<>-+') # a name for... number = _p.Word(_p.nums + ".") # a number net = identifier # a net nets = _p.Group(_p.OneOrMore(net('net') | linebreak)) # many nets cktname = identifier # name of a subcircuit cktname_end = _p.Keyword("ends").suppress() comment = _p.Suppress("//" + _p.SkipTo(_p.LineEnd())) expression = _p.Word(_p.alphanums + '._*+-/()') inst_param_key = identifier + _p.Suppress("=") inst_param_value = expression('expression') inst_parameter = _p.Group( inst_param_key('name') + inst_param_value('value')).setResultsName('key') parameters = _p.Group( _p.ZeroOrMore(inst_parameter | linebreak)).setResultsName('parameters') instref = identifier instname = identifier instance = _p.Group( instname('name') + _p.Suppress('(') + nets('nets') + _p.Suppress(')') + instref('reference') + parameters + EOL).setResultsName('instance') subcircuit_content = _p.Group( _p.ZeroOrMore(instance | EOL | comment)).setResultsName('subnetlist') subcircuit = _p.Group( # matches subckt <name> <nets> <newline> _p.Keyword("subckt").suppress() + cktname('name') + nets('nets') + EOL # matches the content of the subcircuit + subcircuit_content # matches ends <name> <newline> + cktname_end + _p.matchPreviousExpr(cktname).suppress() + EOL).setResultsName('subcircuit') topcircuit = _p.Group( # matches subckt <name> <nets> <newline> _p.Keyword("topckt").suppress() + cktname('name') + nets('nets') + EOL # matches the content of the subcircuit + subcircuit_content # matches ends <name> <newline> + cktname_end + _p.matchPreviousExpr(cktname).suppress() + EOL).setResultsName('topcircuit') netlist_element = subcircuit | topcircuit | EOL | comment('comment') netlist = _p.ZeroOrMore(netlist_element) + _p.StringEnd() parameters.setParseAction(handle_parameters) instance.setParseAction(handle_instance) subcircuit.setParseAction(handle_subcircuit) topcircuit.setParseAction(handle_topcircuit) return netlist.parseString(netlist_string)
def _preprocessing_artifact(): return ( pyparsing.Literal('#') + _natural() + pyparsing.dblQuotedString + pyparsing.SkipTo(pyparsing.LineEnd()) ).suppress()
def _funcParser(): # --- func attribute parser --- # TODO add debug names # TODO add a conditional debug flag bracedString = p.Combine( p.Regex(r"{(?:[^{\n\r\\]|(?:{})|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "}").setName("string enclosed in braces") funcIndicator = p.Literal('!') funcIndicator.setName('indicator') funcName = p.Word(p.alphanums) funcName.setName('name') funcSeparator = p.Suppress(p.Literal('::')) funcSeparator.setName('separator') funcModule = p.Word(p.printables, excludeChars='(') funcModule.setName('module') funcDemarcStart = p.Literal("(") funcDemarcStart.setName('demarcstart') funcDemarcEnd = p.Literal(")") funcDemarcEnd.setName('demarcend') funcMiddle = p.nestedExpr( ) #(p.sglQuotedString() | bracedString()) # | p.dblQuotedString()) funcMiddle.setName('middle') funcPattern = p.LineStart() + p.Suppress(funcIndicator) + funcName + p.Suppress(funcSeparator) + \ funcModule + funcMiddle + \ p.Suppress(p.Optional(p.LineEnd())) #funcModule + p.Suppress(funcDemarcStart) + p.Optional(funcMiddle) + p.Suppress(funcDemarcEnd) + \ return funcPattern
def capnpCreateGrammar(): ''' Creates the pyparsing grammar for capnproto and creates actions to convert to RST ''' ws2space = pp.ZeroOrMore(pp.White()).setParseAction(lambda t: ' ') nonopt2space = pp.OneOrMore(pp.White()).setParseAction(lambda t: ' ') ws2del = ppDel(pp.ZeroOrMore(pp.White())) bracket0 = ppToSpan(pp.Empty() + '{\n', 'cp_op_curly_open') bracket1 = ppToSpan(pp.Empty() + '}', 'cp_op_curly_close') semi = ppToSpan(pp.Empty() + ';', 'cp_op_semi') structNameRaw = pp.Word(pp.alphanums) structName = ppToSpan(pp.Empty() + structNameRaw, 'cp_struct_name', ppToAnchor) fieldName = ppToSpan(pp.Word(pp.alphanums), 'cp_field_name') enumName = ppToSpan(pp.Word(pp.alphanums), 'cp_enum_name', ppToAnchor) structKeyword = ppToSpan(pp.Empty() + 'struct', 'cp_struct_keyword') enumKeyword = ppToSpan(pp.Empty() + 'enum', 'cp_enum_keyword') cpid = ppDel('@' + pp.Word(pp.alphanums) + ';') ordinal = ppToSpan('@' + ws2del + pp.Word(pp.nums), 'cp_ordinal') fieldType = pp.Or([ \ (pp.Empty() + structNameRaw).setParseAction(fieldTypeAction), \ ('List(' + structNameRaw + ')').setParseAction(fieldTypeListAction)]) field = ppToDiv( fieldName + ws2space + ordinal + ws2del + ':' + ws2space + fieldType + ws2del + semi, 'cp_field') comment = ppToDiv(ws2del + '#' + ws2space + pp.Word(pp.printables + ' ') + pp.LineEnd(), 'cp_comment', lambda t: ''.join(t[:-1])) comment_struct = ppToDiv(ws2del + '#' + ws2space + pp.Word(pp.printables + ' ') + pp.LineEnd(), 'cp_comment_struct', lambda t: ''.join(t[:-1])) enum_field = ppToDiv(ws2del + fieldName + ws2space + ordinal + ws2del + semi + ws2del, 'cp_enum_field') enum_entry = ws2del + pp.Or([comment, enum_field]) + ws2del enum_body = ppToDiv(pp.ZeroOrMore(enum_entry), 'cp_scope') enum = ppToDiv( pp.ZeroOrMore(comment) + enumKeyword + nonopt2space + enumName + ws2space + bracket0 + ws2del + enum_body + ws2del + bracket1, 'cp_enum') struct = pp.Forward() struct_entry = ws2del + pp.Or([comment, field, enum, struct]) + ws2del struct_body = ppToDiv(pp.ZeroOrMore(struct_entry), 'cp_scope') struct << ppToDiv( pp.ZeroOrMore(comment) + structKeyword + nonopt2space + structName + ws2space + bracket0 + ws2del + struct_body + ws2del + bracket1, 'cp_struct') mainstruct = pp.Forward() mainstruct << ppToDiv( pp.ZeroOrMore(comment_struct) + structKeyword + nonopt2space + structName + ws2space + bracket0 + ws2del + struct_body + ws2del + bracket1, 'cp_mainstruct') using = ppDel(pp.Empty() + 'using import "' + pp.Word(pp.alphanums + "./_") + '".' + pp.Word(pp.alphanums) + ';') capnp = ws2del + pp.ZeroOrMore(comment) + cpid + ws2del + pp.ZeroOrMore( pp.Or([mainstruct + ws2del, using + ws2del, enum + ws2del])) return capnp.leaveWhitespace()
class PyparsingConstants(object): """Constants for pyparsing-based parsers.""" # Numbers. INTEGER = pyparsing.Word(pyparsing.nums).setParseAction(PyParseIntCast) IPV4_ADDRESS = pyparsing.pyparsing_common.ipv4_address IPV6_ADDRESS = pyparsing.pyparsing_common.ipv6_address IP_ADDRESS = (IPV4_ADDRESS | IPV6_ADDRESS) # TODO: deprecate and remove, use THREE_LETTERS instead. # TODO: fix Python 3 compatibility of .uppercase and .lowercase. # pylint: disable=no-member MONTH = pyparsing.Word(pyparsing.string.ascii_uppercase, pyparsing.string.ascii_lowercase, exact=3) # Define date structures. HYPHEN = pyparsing.Literal('-').suppress() ONE_OR_TWO_DIGITS = pyparsing.Word(pyparsing.nums, min=1, max=2).setParseAction(PyParseIntCast) TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).setParseAction(PyParseIntCast) THREE_DIGITS = pyparsing.Word(pyparsing.nums, exact=3).setParseAction(PyParseIntCast) FOUR_DIGITS = pyparsing.Word(pyparsing.nums, exact=4).setParseAction(PyParseIntCast) THREE_LETTERS = pyparsing.Word(pyparsing.alphas, exact=3) DATE_ELEMENTS = (FOUR_DIGITS.setResultsName('year') + pyparsing.Suppress('-') + TWO_DIGITS.setResultsName('month') + pyparsing.Suppress('-') + TWO_DIGITS.setResultsName('day_of_month')) TIME_ELEMENTS = (TWO_DIGITS.setResultsName('hours') + pyparsing.Suppress(':') + TWO_DIGITS.setResultsName('minutes') + pyparsing.Suppress(':') + TWO_DIGITS.setResultsName('seconds')) TIME_MSEC_ELEMENTS = (TIME_ELEMENTS + pyparsing.Word('.,', exact=1).suppress() + INTEGER.setResultsName('microseconds')) # Date structures defined as a single group. DATE = pyparsing.Group(DATE_ELEMENTS) DATE_TIME = pyparsing.Group(DATE_ELEMENTS + TIME_ELEMENTS) DATE_TIME_MSEC = pyparsing.Group(DATE_ELEMENTS + TIME_MSEC_ELEMENTS) TIME = pyparsing.Group(TIME_ELEMENTS) TIME_MSEC = TIME + pyparsing.Suppress('.') + INTEGER # TODO: replace by # TIME_MSEC = pyparsing.Group(TIME_MSEC_ELEMENTS) COMMENT_LINE_HASH = pyparsing.Literal('#') + pyparsing.SkipTo( pyparsing.LineEnd()) # TODO: Add more commonly used structs that can be used by parsers. PID = pyparsing.Word(pyparsing.nums, min=1, max=5).setParseAction(PyParseIntCast)
def validate_staples(f): # initiate variables flag, msg, error = True, 'Staples are OK', '' import pyparsing as pg # example coordinate: 1[23] coord = pg.Regex('\d+\[\d+\],').setName('coordinates in format XX[YY].') # example sequence: AGGTTAGATCG seq = pg.Regex("[ATGC]+,").setName('DNA sequence composed of ATGC.') # example length: 8 length = pg.Regex('\d+,').setName('integer staple length.') # example color: #54aa8d color = pg.Regex('#[0-9A-z]{6}').setName('color in format #AABBCC.') # example line # 0[116],1[107],TGTCTCAGCTGCATCGCAAGACATCATCAAAGG,33,#170fde validLine = coord + coord + seq + length + color + pg.LineEnd() for line in f: try: validLine.parseString(line) except pg.ParseException as pe: flag, msg, error = False, pe.markInputline( '?'), 'Invalid staples. ' + pe.msg break return (flag, msg, error)
class ZshExtendedHistoryParser(text_parser.PyparsingMultiLineTextParser): """Parser for Zsh extended_history files""" NAME = u'zsh_extended_history' DESCRIPTION = u'Parser for ZSH extended history files' _VERIFICATION_REGEX = re.compile(r'^:\s\d+:\d+;') _PYPARSING_COMPONENTS = { u'timestamp': text_parser.PyparsingConstants.INTEGER. setResultsName(u'timestamp'), u'elapsed_seconds': text_parser.PyparsingConstants.INTEGER. setResultsName(u'elapsed_seconds'), u'command': pyparsing.Regex(r'.+?(?=($|\n:\s\d+:\d+;))', re.DOTALL). setResultsName(u'command'), } _LINE_GRAMMAR = ( pyparsing.Literal(u':') + _PYPARSING_COMPONENTS[u'timestamp'] + pyparsing.Literal(u':') + _PYPARSING_COMPONENTS[u'elapsed_seconds'] + pyparsing.Literal(u';') + _PYPARSING_COMPONENTS[u'command'] + pyparsing.LineEnd()) LINE_STRUCTURES = [(u'command', _LINE_GRAMMAR)] def ParseRecord(self, parser_mediator, key, structure): """Parses a record and produces a Zsh history event. Args: parser_mediator: a parser mediator object (instance of ParserMediator). key: an string indicating the name of the parsed structure. structure: the elements parsed from the file (instance of pyparsing.ParseResults). Raises: UnableToParseFile: if an unsupported key is provided. """ if key != u'command': raise errors.UnableToParseFile(u'Unsupported key {0:s}'.format(key)) event_object = ZshHistoryEvent( structure[u'timestamp'], structure[u'elapsed_seconds'], structure[u'command']) parser_mediator.ProduceEvent(event_object) def VerifyStructure(self, parser_mediator, lines): """Verifies whether content corresponds to a Zsh extended_history file. Args: parser_mediator: a parser mediator object (instance of ParserMediator). lines: a string containing one or more lines of content from the file-like object being evaluated for parsing. Returns: A boolean that indicates the lines appear to contain content from a Zsh extended_history file. """ if self._VERIFICATION_REGEX.match(lines): return True
def args(cls, player): """ Return the pyp pattern for this command's arguments. This implementation rejects any args; subclasses should override if they intend to accept any. """ # By default, accept no arguments return pyp.LineEnd()
def aggregatorMetas(): star = p.Literal('*').suppress() metaName = p.Word(p.alphanums) metaKeyword = p.Combine(star + metaName).setResultsName('key') equals = p.Literal('=').suppress() value = p.Word(p.printables + ' ') metaValue = (equals + value).setResultsName('value') metaDef = p.Dict( p.Group(metaKeyword + metaValue) + p.Optional(p.LineEnd().suppress())) return metaDef
class GenericListAttribute(Node): __fragments__ = { 'name': 'attribute-name', 'value': lambda t: t['attribute-value'].asList() } __syntax__ = (p.LineStart().suppress() + p.NotAny(p.White(' ')) + p.Optional(p.White('\t')).suppress() + AttributeName + p.Literal(':').suppress() + p.LineEnd().suppress() + GenericListAttributeValue).setResultsName("attribute")
def grammer(): lparen = pp.Suppress("(") rparen = pp.Suppress(")") equal = pp.Suppress("=") nl = pp.Suppress(pp.LineEnd()) reg = pp.Combine("$" + pp.Optional("cr") + pp.Word(pp.srange("[0-7]"), max=1)) num = pp.Word(pp.srange("[0-9]")).setParseAction(lambda s, l, t: int(t[0])) val = pp.Word( pp.srange("[0-9a-fA-F]")).setParseAction(lambda s, l, t: int(t[0], 16)) values = pp.Dict(pp.OneOrMore(pp.Group(reg + equal + val))) return num + lparen + values + rparen + nl
def _build_txt_parser(): separator = pp.Suppress('=') key = pp.Literal('%') + pp.Word(pp.printables, excludeChars='=') value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress() key_value = key + separator + value header = (pp.LineStart().suppress() + pp.Word(pp.nums) + pp.ZeroOrMore(pp.White().suppress() + pp.Word(pp.nums)) + pp.LineEnd().suppress()) column_heading = pp.Literal('%') + pp.Word( pp.printables, excludeChars='=') + separator + value txt_row = pp.delimitedList( pp.Word(pp.nums + '.+-e_') | pp.Literal('custom')) + pp.LineEnd().suppress() return pp.Optional(header) + pp.ZeroOrMore(pp.Dict(pp.Group(block))).setResultsName('meta') + \ column_heading.setResultsName('columnHeading') + \ pp.Group(pp.OneOrMore(pp.Group(csv_row))).setResultsName('textValues')
def gto_basis_parser(): """ Gaussian-type orbital basis parser with pyparsing Basis structure in CRYSTAL is as follows: NUM NSHELLS <ECP_PART> <SHELL_PART> <PRIMITIVE_PART> :return: basis parser """ header = 2 * pc.integer ecp_part = pp.Word(pp.alphas) + pp.Optional( pp.Group(pc.real + 6 * pc.integer) + pp.Group(pp.OneOrMore(pp.Group(2 * pc.real + pc.signed_integer)))) bs_head = pp.Group(3 * pc.integer + 2 * pc.number) bs_part = pp.OneOrMore( pp.Group(bs_head + pp.ZeroOrMore( pp.Group((3 * pc.number + pp.Suppress(pp.LineEnd())) ^ (2 * pc.number + pp.Suppress(pp.LineEnd())))))) return pp.SkipTo(header) + header('header') + pp.Optional( ecp_part('ecp')) + bs_part('bs')
def parse_design(self): # GLOBALS for this class EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y design_id = pp.Keyword('DESIGN') design = design_id + identifier('DESIGN') + linebreak self.events[0].set() # event[0] (parse_dbuPerMicron) has priority return design
def _parse_map_tables(report_str: str) -> Dict[str, str]: """ Parse the tables from a ISE map report. Keys are the title of the table, values are the table body. """ # Capture the title from section headings like: # # Section 12 - Control Set Information # ------------------------------------ title = ( pp.lineStart() + "Section" + ppc.integer + "-" + pp.SkipTo(pp.lineEnd())("title").setParseAction(pp.tokenMap(str.strip)) + pp.lineEnd() ) sec_hline = pp.Suppress(pp.lineStart() + pp.Word("-") + pp.lineEnd() * (1,)) # Table horizontal lines like # +-------------------------------+ hline = pp.lineStart() + pp.Word("+", "+-") + pp.lineEnd() # Most tables will have the format # +-----------------------+ # | Col 1 | Col 2 | Col 3 | # +-----------------------+ # | D1 | D2 | D3 | # ... # +-----------------------+ # # However "Control Set Information" appears to use horizontal lines to # separate clocks within the data section. Therefore, just grab # everything until a horizontal line followed by a blank line rather # than something more precise. table = pp.Combine(hline + pp.SkipTo(hline + pp.LineEnd(), include=True))( "body" ) table_section = title + sec_hline + table # Make line endings significant table_section.setWhitespaceChars(" \t") result = {t.title: t.body for t in table_section.searchString(report_str)} return result
def _metaParser(): # --- meta parser --- metaIndicator = p.LineStart() + p.Suppress(p.Literal('*')) metaName = p.Word(p.printables) metaSeparator = p.Suppress(p.Literal('=')) # TODO force case insensitivity in attributeMode keyword match # TODO add debug names # TODO add a conditional debug flag metavalue = p.Combine(p.restOfLine() + p.Suppress(p.LineEnd())) metaList = metaIndicator + metaName + metaSeparator + metavalue return metaList
def generate_expr(): number = pp.Literal("number").setParseAction(lambda: {"type": "number"}) string = pp.Literal("string").setParseAction(lambda: {"type": "string"}) count = pp.Literal("count") \ .setParseAction(lambda: {"type": "integer", "minimum": 0}) expr = pp.Forward() tuple = (pp.Suppress(pp.Literal("(")) + pp.delimitedList(expr) + pp.Suppress(pp.Literal(")"))).setParseAction(tuple_validator) array = (length + pp.Suppress("*") + expr).setParseAction(array_validator) expr << (tuple | number | string | count | array) return pp.LineStart() + expr + pp.LineEnd() # throw error on extra stuff
def __init__(self): newline = "\n" space_plus = pp.Regex("[ \t]+") space_star = pp.Optional(space_plus) quoted_element = pp.Regex(r'[^\\"]|\\[^A-Za-z0-9]|\\[trn]') quoted_argument = pp.Combine('"' + pp.ZeroOrMore(quoted_element) + '"') bracket_content = pp.Forward() def action_bracket_open(tokens: pp.ParseResults): nonlocal bracket_content marker = "]" + "=" * (len(tokens[0]) - 2) + "]" bracket_content <<= pp.SkipTo(marker, include=True) bracket_open = pp.Regex(r"\[=*\[").setParseAction(action_bracket_open) bracket_argument = pp.Combine(bracket_open + bracket_content) unquoted_element = pp.Regex(r'[^\s()#"\\]|\\[^A-Za-z0-9]|\\[trn]') unquoted_argument = pp.Combine(pp.OneOrMore(unquoted_element)) argument = bracket_argument | quoted_argument | unquoted_argument line_comment = pp.Combine("#" + ~bracket_open + pp.SkipTo(pp.LineEnd())) bracket_comment = pp.Combine("#" + bracket_argument) line_ending = ( space_star + pp.ZeroOrMore(bracket_comment + space_star) + pp.Optional(line_comment) + (newline | pp.lineEnd) ) identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_") arguments = pp.Forward() arguments << pp.ZeroOrMore( argument | line_ending | space_plus | "(" + arguments + ")" ).leaveWhitespace() arguments = pp.Group(arguments) PAREN_L, PAREN_R = map(pp.Suppress, "()") command_invocation = ( identifier + space_star.suppress() + PAREN_L + arguments + PAREN_R ).setParseAction(lambda t: (t[0], t[1].asList())) file_element = ( space_star + command_invocation + line_ending | line_ending ).leaveWhitespace() file = pp.ZeroOrMore(file_element) self._parser = file
def adblock_content(self, lurl): url_dom = urlparse(lurl).hostname # TODO add option stimeout self.browser.load(lurl, load_timeout=120, tries=3) # soup = self.browser.soup.encode('utf-8') # при таком парсинге кодировка слетает # РЕШЕНИЕ: через QString который превращаем в unicode html_str = unicode(self.browser.webframe.toHtml().toUtf8(), encoding="UTF-8") html_str = self.apply_css_sel(html_str, url_dom) removetext = pyparsing.replaceWith("") pyparsing.htmlComment.setParseAction(removetext) pyparsing.commonHTMLEntity.setParseAction(pyparsing.replaceHTMLEntity) text_str = (pyparsing.htmlComment | pyparsing.commonHTMLEntity).transformString(html_str) # text_str = self.apply_css_sel(text_str, url_dom) for tag in ["script", "iframe", "style", "noscript"]: text_str = self.trans_tag(text_str, tag, removetext) anytag = pyparsing.anyOpenTag anyclose = pyparsing.anyCloseTag anytag.setParseAction(removetext) anyclose.setParseAction(removetext) # заменяем теги со ccылками text_str = self.trans_tag(text_str, "a", self.change_a_tag) # теги h p text_str = self.trans_tag(text_str, "h", self.change_ph_tag) text_str = self.trans_tag(text_str, "p", self.change_ph_tag) text_str = (anytag | anyclose).transformString(text_str) repeatednewlines = pyparsing.LineEnd() + pyparsing.OneOrMore( pyparsing.LineEnd()) repeatednewlines.setParseAction(pyparsing.replaceWith("\n\n")) text_str = repeatednewlines.transformString(text_str) # print("res:", text.encode('utf-8')) return text_str
class PyparsingConstants(object): """A class that maintains constants for pyparsing.""" # Numbers. INTEGER = pyparsing.Word(pyparsing.nums).setParseAction(PyParseIntCast) IPV4_OCTET = pyparsing.Word(pyparsing.nums, min=1, max=3).setParseAction( PyParseIntCast, PyParseRangeCheck(0, 255)) IPV4_ADDRESS = (IPV4_OCTET + ('.' + IPV4_OCTET) * 3).setParseAction( PyParseJoinList) # TODO: Fix the IPv6 address specification to be more accurate (8 :, correct # size, etc). IPV6_ADDRESS = pyparsing.Word(':' + pyparsing.hexnums).setParseAction( PyParseJoinList) # Common words. MONTH = pyparsing.Word( pyparsing.string.uppercase, pyparsing.string.lowercase, exact=3) # Define date structures. HYPHEN = pyparsing.Literal('-').suppress() YEAR = pyparsing.Word(pyparsing.nums, exact=4).setParseAction( PyParseIntCast) TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).setParseAction( PyParseIntCast) ONE_OR_TWO_DIGITS = pyparsing.Word( pyparsing.nums, min=1, max=2).setParseAction(PyParseIntCast) DATE = pyparsing.Group( YEAR + pyparsing.Suppress('-') + TWO_DIGITS + pyparsing.Suppress('-') + TWO_DIGITS) DATE_REV = pyparsing.Group( TWO_DIGITS + pyparsing.Suppress('-') + TWO_DIGITS + pyparsing.Suppress('-') + YEAR) TIME = pyparsing.Group( TWO_DIGITS + pyparsing.Suppress(':') + TWO_DIGITS + pyparsing.Suppress(':') + TWO_DIGITS) TIME_MSEC = TIME + pyparsing.Suppress('.') + INTEGER DATE_TIME = DATE + TIME DATE_TIME_MSEC = DATE + TIME_MSEC COMMENT_LINE_HASH = pyparsing.Literal('#') + pyparsing.SkipTo( pyparsing.LineEnd()) # TODO: Add more commonly used structs that can be used by parsers. PID = pyparsing.Word( pyparsing.nums, min=1, max=5).setParseAction(PyParseIntCast)
def metaParser(): # --- meta parser --- metaIndicator = p.LineStart() + p.Suppress(p.Literal('*')) metaName = p.Word(p.alphanums).setResultsName('metaname') metaSeparator = p.Suppress(p.Literal('=')) # TODO force case insensitivity in attributeMode keyword match # TODO add debug names # TODO add a conditional debug flag metavalue = p.Combine(p.restOfLine() + p.Suppress(p.LineEnd())).setResultsName( 'metavalue') metaList = p.Dict( p.Group(metaIndicator + metaName + metaSeparator + metavalue)) return metaList