NumericLiteral, BooleanLiteral, LANGTAG) from rdflib_elasticstore.sparql.parserutils import Comp, Param, CompValue from rdflib import Literal as RDFLiteral from six import binary_type ParserElement.setDefaultWhitespaceChars(" \n") String = STRING_LITERAL1 | STRING_LITERAL2 RDFLITERAL = Comp( 'literal', Param('string', String) + Optional( Param('lang', LANGTAG.leaveWhitespace()) | Literal('^^').leaveWhitespace() + Param('datatype', IRIREF).leaveWhitespace())) NONE_VALUE = object() EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t") EMPTY.setParseAction(lambda x: NONE_VALUE) TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM)) ROW.parseWithTabs() HEADER = Var + ZeroOrMore(Suppress("\t") + Var) HEADER.parseWithTabs()
# define pgn grammar # tag = Suppress("[") + Word(alphanums) + Combine(quotedString) + Suppress("]") comment = Suppress("{") + Word(alphanums + " ") + Suppress("}") dot = Literal(".") piece = oneOf("K Q B N R") file_coord = oneOf("a b c d e f g h") rank_coord = oneOf("1 2 3 4 5 6 7 8") capture = oneOf("x :") promote = Literal("=") castle_queenside = oneOf("O-O-O 0-0-0 o-o-o") castle_kingside = oneOf("O-O 0-0 o-o") move_number = Optional(comment) + Word(nums) + dot m1 = file_coord + rank_coord # pawn move e.g. d4 m2 = file_coord + capture + file_coord + rank_coord # pawn capture move e.g. dxe5 m3 = file_coord + "8" + promote + piece # pawn promotion e.g. e8=Q m4 = piece + file_coord + rank_coord # piece move e.g. Be6 m5 = piece + file_coord + file_coord + rank_coord # piece move e.g. Nbd2 m6 = piece + rank_coord + file_coord + rank_coord # piece move e.g. R4a7 m7 = piece + capture + file_coord + rank_coord # piece capture move e.g. Bxh7 m8 = castle_queenside | castle_kingside # castling e.g. o-o check = oneOf("+ ++") mate = Literal("#") annotation = Word("!?", max=2) nag = " $" + Word(nums) decoration = check | mate | annotation | nag
def _parse_folder_spec(spec, groups, sort_key): """Parse the folder specification into a nested list. Args: spec (str): folder specification groups (dict): map of group name to list of folders in group sort_key (callable): map of folder name to sortable object. Returns: list: list of parsed tokens Raises: ValueError: if `spec` cannot be parsed. """ group_names = list(groups.keys()) def convert_to_slice(parse_string, loc, tokens): """Convert SliceSpec tokens to slice instance.""" parts = "".join(tokens[1:-1]).split(':') if len(parts) == 1: i = int(parts[0]) if i == -1: return slice(i, None, None) else: return slice(i, i + 1, None) else: parts += [''] * (3 - len(parts)) # pad to length 3 start, stop, step = (int(v) if len(v) > 0 else None for v in parts) return slice(start, stop, step) def convert_to_callable_filter(parse_string, loc, tokens): """Convert ConditionSpec to a callable filter. The returned filter takes a single argument `folder` and return True if the `folder` passes the filter. """ op, arg = tokens[0], tokens[1] def _filter(folder, _op, _list): folder = parse_version(folder) _list = [parse_version(v) for v in _list] if _op == 'in': return folder in _list elif _op == 'not in': return folder not in _list elif _op == '<=': return all([folder <= v for v in _list]) elif _op == '<': return all([folder < v for v in _list]) elif _op == '==': return all([folder == v for v in _list]) elif _op == '!=': return all([folder != v for v in _list]) elif _op == '>=': return all([folder >= v for v in _list]) elif _op == '>': return all([folder > v for v in _list]) else: # pragma: nocover raise ValueError("Unknown operator: %r" % _op) if isinstance(arg, str): _list = [arg] else: _list = _resolve_folder_spec([arg.asList()], groups, sort_key=sort_key) return partial(_filter, _op=op, _list=_list) Int = Word(nums + "-", nums) Colon = Literal(':') SliceSpec = ("[" + Optional(Int) + Optional(Colon + Optional(Int)) + Optional(Colon + Optional(Int)) + "]").setParseAction(convert_to_slice) LogicalOperator = (Literal('in') | Literal('not in') | Literal('<=') | Literal('<') | Literal('==') | Literal('!=') | Literal('>=') | Literal('>')) GroupName = Group("<" + oneOf(group_names, caseless=True) + ">") FolderName = Word(alphanums, alphanums + ".-_+") ParenthesizedListSpec = Forward() ConditionSpec = Forward() ParenthesizedListSpec <<= Group("(" + delimitedList(GroupName | FolderName | ParenthesizedListSpec) + ZeroOrMore(ConditionSpec) + ")" + Optional(SliceSpec)) ConditionSpec <<= LogicalOperator + (FolderName | GroupName | ParenthesizedListSpec) ConditionSpec = ConditionSpec.setParseAction(convert_to_callable_filter) ListSpec = delimitedList(GroupName | FolderName | ParenthesizedListSpec) Spec = ListSpec | ParenthesizedListSpec if spec.strip() == '': return [] try: return Spec.parseString(spec, parseAll=True).asList() except ParseException as exc: raise ValueError("Invalid specification (marked '*'): %r" % exc.markInputline('*'))
def rc_statement(): """ Generate a RC statement parser that can be used to parse a RC file :rtype: pyparsing.ParserElement """ one_line_comment = '//' + restOfLine comments = cStyleComment ^ one_line_comment precompiler = Word('#', alphanums) + restOfLine language_definition = "LANGUAGE" + Word(alphas + '_').setResultsName( "language") + Optional(',' + Word(alphas + '_').setResultsName("sublanguage")) block_start = (Keyword('{') | Keyword("BEGIN")).setName("block_start") block_end = (Keyword('}') | Keyword("END")).setName("block_end") reserved_words = block_start | block_end name_id = ~reserved_words + \ Word(alphas, alphanums + '_').setName("name_id") numbers = Word(nums) integerconstant = numbers ^ Combine('0x' + numbers) constant = Combine(Optional(Keyword("NOT")) + (name_id | integerconstant), adjacent=False, joinString=' ') combined_constants = delimitedList(constant, '|') block_options = Optional( SkipTo(Keyword("CAPTION"), failOn=block_start)("pre_caption") + Keyword("CAPTION") + quotedString("caption")) + SkipTo(block_start)("post_caption") undefined_control = Group( name_id.setResultsName("id_control") + delimitedList(quotedString ^ constant ^ numbers ^ Group(combined_constants)).setResultsName("values_")) block = block_start + \ ZeroOrMore(undefined_control)("controls") + block_end dialog = name_id("block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG") )("block_type") + block_options + block string_table = Keyword("STRINGTABLE")("block_type") + block_options + block menu_item = Keyword("MENUITEM")("block_type") + ( commaSeparatedList("values_") | Keyword("SEPARATOR")) popup_block = Forward() popup_block <<= Group( Keyword("POPUP")("block_type") + Optional(quotedString("caption")) + block_start + ZeroOrMore(Group(menu_item | popup_block))("elements") + block_end)("popups*") menu = name_id("block_id") + \ Keyword("MENU")("block_type") + block_options + \ block_start + ZeroOrMore(popup_block) + block_end statem = comments ^ precompiler ^ language_definition ^ dialog ^ string_table ^ menu return statem
present.setParseAction(lambda s, l, t: pureldap.LDAPFilter_present(t[0])) initial = copy.copy(value) initial.setParseAction( lambda s, l, t: pureldap.LDAPFilter_substrings_initial(t[0])) initial.setName('initial') any_value = value + Suppress(Literal("*")) any_value.setParseAction( lambda s, l, t: pureldap.LDAPFilter_substrings_any(t[0])) any = Suppress(Literal("*")) + ZeroOrMore(any_value) any.setName('any') final = copy.copy(value) final.setName('final') final.setParseAction( lambda s, l, t: pureldap.LDAPFilter_substrings_final(t[0])) substring = attr + Suppress( Literal("=")) + Group(Optional(initial) + any + Optional(final)) substring.setName('substring') def _p_substring(s, l, t): attrtype, substrings = t return pureldap.LDAPFilter_substrings(type=attrtype, substrings=substrings) substring.setParseAction(_p_substring) keystring = Word(string.ascii_letters, string.ascii_letters + string.digits + ';-') keystring.setName('keystring') numericoid = delimitedList(Word(string.digits), delim='.', combine=True) numericoid.setName('numericoid')
from pyparsing import alphas, alphanums, Combine, delimitedList, Forward, Group, Literal, \ Keyword, nums, oneOf, Optional, ParserElement, Suppress, White, Word ParserElement.enablePackrat() LPAR, RPAR = map(Suppress, '()') const = Literal('true') | Literal('false') AOps = oneOf('INTS_MODULUS_TOTAL * / + -').setParseAction( lambda s, l, t: ['%'] if t[0] == 'INTS_MODULUS_TOTAL' else t) BOps = (Keyword('and').setParseAction(lambda s, l, t: ['&']) | Keyword('not').setParseAction(lambda s, l, t: ['!']) | Keyword('or').setParseAction(lambda s, l, t: ['|'])) ROps = oneOf('< > <= >= =') val = Combine(Optional('-') + Word(nums)) var = Word(alphas + '_:$', alphanums + '_:$') term = val | var let = Forward() pred = Forward() stmt = Forward() expr = Forward() expr << (term | (LPAR + AOps + Group(delimitedList(expr, delim=White(' '))) + RPAR ).setParseAction(lambda s, l, t: [list(joinit(t[1], t[0]))] if not ( t[0] == '-' and len(t[1]) == 1) else [['0 -', t[1][0]]]) | (LPAR + expr + RPAR))
aop0 = oneOf('* /') aop1 = oneOf('+ -') aop2 = oneOf('%').setParseAction(lambda s, l, t: ['mod']) bop = oneOf('& |').setParseAction(lambda s, l, t: ['and'] if t[0] == '&' else ['or']) NOT = Literal('!') rop = oneOf('< > <= >= = !=').setParseAction(lambda s, l, t: ['distinct'] if t[0] == '!=' else t) GET, CAT, HAS, IND, LEN, REP, SUB, EQL = map( Literal, '#get #cat #has #ind #len #rep #sub #eql'.split()) var = Word(alphas + '_:$', alphanums + '_:$').setParseAction(addVar) ival = Combine(Optional('-') + Word(nums)).setParseAction( lambda s, l, t: ['(- %s)' % t[0][1:]] if t[0][0] == '-' else t) ivar = (ival + var).setParseAction(lambda s, l, t: ['*', t[0], t[1]]) term = ivar | ival | var | QuotedString(quoteChar='"', unquoteResults=False) stmt = Forward() expr = Forward() sexpr = Forward() sexpr << ( (GET + LPAR + expr + COMMA + expr + RPAR).setParseAction(lambda s, l, t: CharAtAction(t)) | (CAT + LPAR + expr + COMMA + expr + RPAR).setParseAction( lambda s, l, t: [['Concat', chkString(t[1]), chkString(t[2])]])
def define_dot_parser(self): """Define dot grammar Based on the grammar http://www.graphviz.org/doc/info/lang.html """ # punctuation colon = Literal(":") lbrace = Suppress("{") rbrace = Suppress("}") lbrack = Suppress("[") rbrack = Suppress("]") lparen = Literal("(") rparen = Literal(")") equals = Suppress("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Suppress(";") at = Literal("@") minus = Literal("-") pluss = Suppress("+") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") punctuation_ = "".join([c for c in string.punctuation if c not in '_' ]) + string.whitespace # token definitions identifier = Word(alphanums + "_").setName("identifier") #double_quoted_string = QuotedString('"', multiline=True,escChar='\\', # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine( double_quoted_string + Optional(OneOrMore(pluss + double_quoted_string)), adjacent=False) alphastring_ = OneOrMore(CharsNotIn(punctuation_)) def parse_html(s, loc, toks): return '<<%s>>' % ''.join(toks[0]) opener = '<' closer = '>' try: html_text = pyparsing.nestedExpr( opener, closer, ((CharsNotIn(opener + closer).setParseAction(lambda t: t[0])) )).setParseAction(parse_html) except: log.debug('nestedExpr not available.') log.warning('Old version of pyparsing detected. Version 1.4.8 or ' 'later is recommended. Parsing of html labels may not ' 'work properly.') html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) ID = ( alphastring_ | html_text | quoted_string | #.setParseAction(strip_quotes) | identifier).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = ((OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen))).setName("port_location") port = Combine( (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location)))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName( "attr_list").setResultsName('attrlist') attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = (lbrace + Optional(stmt_list) + rbrace + Optional(semi)).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = ( Optional(subgraph_, '') + Optional(ID, '') + Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph') edge_point << (subgraph | graph_stmt | node_id) node_stmt = (node_id + Optional(attr_list) + Optional(semi)).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi)) graphparser = ((Optional(strict_, 'notstrict') + ((graph_ | digraph_)) + Optional(ID, '') + lbrace + Group(Optional(stmt_list)) + rbrace).setResultsName("graph")) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) node_id.setParseAction(self._proc_node_id) assignment.setParseAction(self._proc_attr_assignment) a_list.setParseAction(self._proc_attr_list) edge_stmt.setParseAction(self._proc_edge_stmt) node_stmt.setParseAction(self._proc_node_stmt) attr_stmt.setParseAction(self._proc_default_attr_stmt) attr_list.setParseAction(self._proc_attr_list_combine) subgraph.setParseAction(self._proc_subgraph_stmt) #graph_stmt.setParseAction(self._proc_graph_stmt) graphparser.setParseAction(self._main_graph_stmt) return graphparser
(even binary content) inside the structure. This is done by pre- sizing the data with the NUMBER similar to Dan Bernstein's netstrings setup. SPACE White space is basically ignored. This is interesting because since Stackish is serialized consistently this means you can use \n as the separation character and perform reasonable diffs on two structures. """ from pyparsing import Suppress,Word,nums,alphas,alphanums,Combine,oneOf,\ Optional,QuotedString,Forward,Group,ZeroOrMore,srange MARK, UNMARK, AT, COLON, QUOTE = map(Suppress, "[]@:'") NUMBER = Word(nums) NUMBER.setParseAction(lambda t: int(t[0])) FLOAT = Combine(oneOf("+ -") + Word(nums) + "." + Optional(Word(nums))) FLOAT.setParseAction(lambda t: float(t[0])) STRING = QuotedString('"', multiline=True) WORD = Word(alphas, alphanums + "_:") ATTRIBUTE = Combine(AT + WORD) strBody = Forward() def setBodyLength(tokens): strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0])) return "" BLOB = Combine(QUOTE + Word(nums).setParseAction(setBodyLength) + COLON + strBody + QUOTE)
else: currentLogger.debug(message) ##################################################### # # Variants on individual files # ##################################################### from pyparsing import CharsNotIn, ZeroOrMore, Group, Suppress, OneOrMore, ParseResults, Optional, StringEnd phrase = CharsNotIn("[|]") tag = CharsNotIn("[|]:,") tags = Group( Optional(tag, default="base") + ZeroOrMore(Suppress(',') + tag) + Suppress(":")) option = Group(tags + Optional(phrase, default='')) optionList = Group( Suppress("[") + option + ZeroOrMore(Suppress('|') + option) + Suppress("]")) text = OneOrMore(phrase | optionList) + Suppress(StringEnd()) def clean(unicodeString): # We need to clean the input a bit. For a start, until # we work out what to do, non breaking spaces will be ignored # ie 0xa0 return unicodeString.replace('\xa0', ' ')
def sql_to_spec(query): """ Convert an SQL query to a mongo spec. This only supports select statements. For now. :param query: String. A SQL query. :return: None or a dictionary containing a mongo spec. """ @debug_print def fix_token_list(in_list): """ tokens as List is some times deaply nested and hard to deal with. Improve parser grouping remove this. """ if isinstance(in_list, list) and len(in_list) == 1 and \ isinstance(in_list[0], list): return fix_token_list(in_list[0]) else: return [item for item in in_list] @debug_print def select_count_func(tokens=None): return full_select_func(tokens, 'count') @debug_print def select_distinct_func(tokens=None): return full_select_func(tokens, 'distinct') @debug_print def select_func(tokens=None): return full_select_func(tokens, 'select') def full_select_func(tokens=None, method='select'): """ Take tokens and return a dictionary. """ action = {'distinct': 'distinct', 'count': 'count'}.get(method, 'find') if tokens is None: return ret = { action: True, 'fields': {item: 1 for item in fix_token_list(tokens.asList())} } if ret['fields'].get('id'): # Use _id and not id # Drop _id from fields since mongo always return _id del (ret['fields']['id']) else: ret['fields']['_id'] = 0 if "*" in ret['fields'].keys(): ret['fields'] = {} return ret @debug_print def where_func(tokens=None): """ Take tokens and return a dictionary. """ if tokens is None: return tokens = fix_token_list(tokens.asList()) + [None, None, None] cond = { '!=': '$ne', '>': '$gt', '>=': '$gte', '<': '$lt', '<=': '$lte', 'like': '$regex' }.get(tokens[1]) find_value = tokens[2].strip('"').strip("'") if cond == '$regex': if find_value[0] != '%': find_value = "^" + find_value if find_value[-1] != '%': find_value = find_value + "$" find_value = find_value.strip("%") if cond is None: expr = {tokens[0]: find_value} else: expr = {tokens[0]: {cond: find_value}} return expr @debug_print def combine(tokens=None): if tokens: tokens = fix_token_list(tokens.asList()) if len(tokens) == 1: return tokens else: return {'${}'.format(tokens[1]): [tokens[0], tokens[2]]} # TODO: Reduce list of imported functions. from pyparsing import (Word, alphas, CaselessKeyword, Group, Optional, ZeroOrMore, Forward, Suppress, alphanums, OneOrMore, quotedString, Combine, Keyword, Literal, replaceWith, oneOf, nums, removeQuotes, QuotedString, Dict) LPAREN, RPAREN = map(Suppress, "()") EXPLAIN = CaselessKeyword('EXPLAIN').setParseAction( lambda t: {'explain': True}) SELECT = Suppress(CaselessKeyword('SELECT')) WHERE = Suppress(CaselessKeyword('WHERE')) FROM = Suppress(CaselessKeyword('FROM')) CONDITIONS = oneOf("= != < > <= >= like", caseless=True) #CONDITIONS = (Keyword("=") | Keyword("!=") | # Keyword("<") | Keyword(">") | # Keyword("<=") | Keyword(">=")) AND = CaselessKeyword('and') OR = CaselessKeyword('or') word_match = Word(alphanums + "._") | quotedString number = Word(nums) statement = Group(word_match + CONDITIONS + word_match).setParseAction(where_func) select_fields = Group( SELECT + (word_match | Keyword("*")) + ZeroOrMore(Suppress(",") + (word_match | Keyword("*")))).setParseAction(select_func) select_distinct = (SELECT + Suppress(CaselessKeyword('DISTINCT')) + LPAREN + (word_match | Keyword("*")) + ZeroOrMore(Suppress(",") + (word_match | Keyword("*"))) + Suppress(RPAREN)).setParseAction(select_distinct_func) select_count = (SELECT + Suppress(CaselessKeyword('COUNT')) + LPAREN + (word_match | Keyword("*")) + ZeroOrMore(Suppress(",") + (word_match | Keyword("*"))) + Suppress(RPAREN)).setParseAction(select_count_func) LIMIT = (Suppress(CaselessKeyword('LIMIT')) + word_match).setParseAction(lambda t: {'limit': t[0]}) SKIP = (Suppress(CaselessKeyword('SKIP')) + word_match).setParseAction(lambda t: {'skip': t[0]}) from_table = ( FROM + word_match).setParseAction(lambda t: {'collection': t.asList()[0]}) #word = ~(AND | OR) + word_match operation_term = ( select_distinct | select_count | select_fields ) # place holder for other SQL statements. ALTER, UPDATE, INSERT expr = Forward() atom = statement | (LPAREN + expr + RPAREN) and_term = (OneOrMore(atom) + ZeroOrMore(AND + atom)).setParseAction(combine) or_term = (and_term + ZeroOrMore(OR + and_term)).setParseAction(combine) where_clause = (WHERE + or_term).setParseAction(lambda t: {'spec': t[0]}) list_term = Optional(EXPLAIN) + operation_term + from_table + \ Optional(where_clause) + Optional(LIMIT) + Optional(SKIP) expr << list_term ret = expr.parseString(query.strip()) query_dict = {} _ = map(query_dict.update, ret) return query_dict
# vim: set encoding=utf-8 """Some common combinations""" from pyparsing import (FollowedBy, LineEnd, Literal, OneOrMore, Optional, Suppress, SkipTo, ZeroOrMore) from regparser.grammar import atomic from regparser.grammar.utils import keep_pos, Marker, QuickSearchable period_section = Suppress(".") + atomic.section part_section = atomic.part + period_section marker_part_section = ( keep_pos(atomic.section_marker).setResultsName("marker") + part_section) depth6_p = atomic.em_roman_p | atomic.plaintext_level6_p depth5_p = ((atomic.em_digit_p | atomic.plaintext_level5_p) + Optional(depth6_p)) depth4_p = atomic.upper_p + Optional(depth5_p) depth3_p = atomic.roman_p + Optional(depth4_p) depth2_p = atomic.digit_p + Optional(depth3_p) depth1_p = atomic.lower_p + ~FollowedBy(atomic.upper_p) + Optional(depth2_p) any_depth_p = QuickSearchable(depth1_p | depth2_p | depth3_p | depth4_p | depth5_p | depth6_p) depth3_c = atomic.upper_c + Optional(atomic.em_digit_c) depth2_c = atomic.roman_c + Optional(depth3_c) depth1_c = atomic.digit_c + Optional(depth2_c) any_a = atomic.upper_a | atomic.digit_a section_comment = atomic.section + depth1_c section_paragraph = QuickSearchable(atomic.section + depth1_p)
section_marker = Suppress(Regex(u"§|Section|section")) sections_marker = Suppress(Regex(u"§§|Sections|sections")) # Most of these markers could be SuffixMarkers (which arise due to errors in # the regulation text). We'll wait until we see explicit examples before # converting them though, to limit false matches paragraph_marker = Marker("paragraph") paragraphs_marker = SuffixMarker("paragraphs") part_marker = Marker("part") parts_marker = Marker("parts") subpart_marker = Marker("subpart") comment_marker = ((Marker("comment") | Marker("commentary") | (Marker("official") + Marker("interpretations")) | (Marker("supplement") + Suppress(WordBoundaries("I")))) + Optional(Marker("of") | Marker("to"))) comments_marker = Marker("comments") appendix_marker = Marker("appendix") appendices_marker = Marker("appendices") conj_phrases = ( (Suppress(",") + Optional(Marker("and") | Marker("or"))) | Marker("and") | Marker("or") | (Marker("except") + Marker("for")) | Suppress(Marker("through") | "-" | u"–").setParseAction(lambda: True).setResultsName("through")) title = Word(string.digits).setResultsName("cfr_title")
escaped = ( Literal("\\").suppress() + # chr(20)-chr(126) + chr(128)-unichr(sys.maxunicode) Regex("[\u0020-\u007e\u0080-\uffff]", re.IGNORECASE) ) def convertToUnicode(t): return chr(int(t[0], 16)) hex_unicode = ( Literal("\\").suppress() + Regex("[0-9a-f]{1,6}", re.IGNORECASE) + Optional(White(exact=1)).suppress() ).setParseAction(convertToUnicode) escape = hex_unicode | escaped # any unicode literal outside the 0-127 ascii range nonascii = Regex("[^\u0000-\u007f]") # single character for starting an identifier. nmstart = Regex("[A-Z]", re.IGNORECASE) | nonascii | escape nmchar = Regex("[0-9A-Z-]", re.IGNORECASE) | nonascii | escape identifier = Combine(nmstart + ZeroOrMore(nmchar))
ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = Upcase(delimitedList(ident, ".", combine=True)) columnNameList = Group(delimitedList(columnName)) tableName = Upcase(delimitedList(ident, ".", combine=True)) tableNameList = Group(delimitedList(tableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group((columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + selectStmt + ")") | ("(" + whereExpression + ")")) whereExpression << whereCondition + ZeroOrMore((and_ | or_) + whereExpression) # define the grammar selectStmt << (selectToken + ('*' | columnNameList).setResultsName("columns") +
def pushFirst(str, loc, toks): exprStack.append(toks[0]) def assignVar(str, loc, toks): varStack.append(toks[0]) # define grammar point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') number = Word(nums) integer = Combine(Optional(plusorminus) + number) floatnumber = Combine(integer + Optional(point + Optional(number)) + Optional(e + integer)) ident = Word(alphas, alphanums + '_') plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") assign = Literal("=")
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') dpi_setting = Group(Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ'))('SETTINGS*') mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL mount_matrix = Group(mount_matrix_row + ';' + mount_matrix_row + ';' + mount_matrix_row)('MOUNT_MATRIX') xkb_setting = Optional(Word(alphanums + '+-/@._')) # Although this set doesn't cover all of characters in database entries, it's enough for test targets. name_literal = Word(printables + ' ') props = (('MOUSE_DPI', Group(OneOrMore(dpi_setting))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER), ('ID_AUTOSUSPEND', Or((Literal('0'), Literal('1')))), ('ID_AV_PRODUCTION_CONTROLLER', Or((Literal('0'), Literal('1')))), ('ID_PERSIST', Or((Literal('0'), Literal('1')))), ('ID_INPUT', Or((Literal('0'), Literal('1')))), ('ID_INPUT_ACCELEROMETER', Or((Literal('0'), Literal('1')))), ('ID_INPUT_JOYSTICK', Or((Literal('0'), Literal('1')))), ('ID_INPUT_KEY', Or((Literal('0'), Literal('1')))), ('ID_INPUT_KEYBOARD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_MOUSE', Or((Literal('0'), Literal('1')))), ('ID_INPUT_POINTINGSTICK', Or((Literal('0'), Literal('1')))), ('ID_INPUT_SWITCH', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TABLET', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TABLET_PAD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TOUCHPAD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TOUCHSCREEN', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TRACKBALL', Or((Literal('0'), Literal('1')))), ('ID_SIGNAL_ANALYZER', Or((Literal('0'), Literal('1')))), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_JOYSTICK_INTEGRATION', Or(('internal', 'external'))), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ('XKB_FIXED_LAYOUT', xkb_setting), ('XKB_FIXED_VARIANT', xkb_setting), ('XKB_FIXED_MODEL', xkb_setting), ('KEYBOARD_LED_NUMLOCK', Literal('0')), ('KEYBOARD_LED_CAPSLOCK', Literal('0')), ('ACCEL_MOUNT_MATRIX', mount_matrix), ('ACCEL_LOCATION', Or(('display', 'base'))), ('PROXIMITY_NEAR_LEVEL', INTEGER), ('IEEE1394_UNIT_FUNCTION_MIDI', Or((Literal('0'), Literal('1')))), ('IEEE1394_UNIT_FUNCTION_AUDIO', Or((Literal('0'), Literal('1')))), ('IEEE1394_UNIT_FUNCTION_VIDEO', Or((Literal('0'), Literal('1')))), ('ID_VENDOR_FROM_DATABASE', name_literal), ('ID_MODEL_FROM_DATABASE', name_literal), ('ID_TAG_MASTER_OF_SEAT', Literal('1')), ('ID_INFRARED_CAMERA', Or((Literal('0'), Literal('1')))), ('ID_CAMERA_DIRECTION', Or(('front', 'rear'))), ) fixed_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props] kbd_props = [Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) + EOL return grammar
def _net_decl(self): """ delay3 ::= # delay_value | # ( mintypmax_expression [ , mintypmax_expression [ , mintypmax_expression ] ] ) delay2 ::= # delay_value | # ( mintypmax_expression [ , mintypmax_expression ] ) delay_value ::= unsigned_number | real_number | identifier net_declaration ::= net_type [ signed ] [ delay3 ] list_of_net_identifiers ; | net_type [ drive_strength ] [ signed ] [ delay3 ] list_of_net_decl_assignments ; | net_type [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_identifiers ; | net_type [ drive_strength ] [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_decl_assignments ; | trireg [ charge_strength ] [ signed ] [ delay3 ] list_of_net_identifiers ; | trireg [ drive_strength ] [ signed ] [ delay3 ] list_of_net_decl_assignments ; | trireg [ charge_strength ] [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_identifiers ; | trireg [ drive_strength ] [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_decl_assignments ; Not Implemented Yet: [ charge_strength ]: Defined in this class but not being used here delay_value ::= real_number """ # TODO: delay_val should use unsigned_number instead of number delay_val = number | identifier self.delay3 = Group(SHARP + ( (LPARENTH + self.expr.mintypmax_expression + Optional(COMMA + self.expr.mintypmax_expression + Optional(COMMA + self.expr.mintypmax_expression)) + RPARENTH) | delay_val)) self.delay2 = Group(SHARP + ( (LPARENTH + self.expr.mintypmax_expression + Optional(COMMA + self.expr.mintypmax_expression) + RPARENTH) | delay_val)) vectored_kw = Keyword('vectored') scalared_kw = Keyword('scalared') # TODO: Review the folloing defines, and trireg with drive_strength has not been tested # net_type [ signed ] [ delay3 ] list_of_net_identifiers ; net_decl_0 = Group(net_type + Optional(self.signed_kw) + Optional(self.delay3) + self.l_net_idx) # trireg [ charge_strength ] [ signed ] [ delay3 ] list_of_net_identifiers ; trireg_decl_0 = Group(trireg + Optional(self.charge_strength) + Optional(self.signed_kw) + Optional(self.delay3) + self.l_net_idx) # net_type [ drive_strength ] [ signed ] [ delay3 ] list_of_net_decl_assignments ; net_decl_1 = Group(net_type + Optional(self.drive_strength) + Optional(self.signed_kw) + Optional(self.delay3) + self.l_net_decl_assign) # trireg [ drive_strength ] [ signed ] [ delay3 ] list_of_net_decl_assignments ; trireg_decl_1 = Group(trireg + Optional(self.drive_strength) + Optional(self.signed_kw) + Optional(self.delay3) + self.l_net_decl_assign) # net_type [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_identifiers ; net_decl_2 = Group(net_type + Optional(vectored_kw | scalared_kw) + Optional(self.signed_kw) + self.the_range + Optional(self.delay3) + self.l_net_idx) # trireg [ charge_strength ] [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_identifiers ; trireg_decl_2 = Group(trireg + Optional(self.charge_strength) + Optional(vectored_kw | scalared_kw) + Optional(self.signed_kw) + self.the_range + Optional(self.delay3) + self.l_net_idx) # net_type [ drive_strength ] [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_decl_assignments ; net_decl_3 = Group(net_type + Optional(self.drive_strength) + Optional(vectored_kw | scalared_kw) + Optional(self.signed_kw) + self.the_range + Optional(self.delay3) + self.l_net_decl_assign) # trireg [ drive_strength ] [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_decl_assignments ; trireg_decl_3 = Group(trireg + Optional(self.drive_strength) + Optional(vectored_kw | scalared_kw) + Optional(self.signed_kw) + self.the_range + Optional(self.delay3) + self.l_net_decl_assign) net_decl = (trireg_decl_3 | trireg_decl_2 | trireg_decl_1 | trireg_decl_0 | net_decl_3 | net_decl_2 | net_decl_1 | net_decl_0) + SEMI return net_decl
class BoolNot(object): def __init__(self,t): self.arg = t[0][1] def __bool__(self): v = bool(self.arg) return not v def __str__(self): return "!" + str(self.arg) __repr__ = __str__ __nonzero__ = __bool__ if PYPARSING_AVAILABLE: PF_KEYWORD=oneOf(postfixfields) intnum = Word(nums).setParseAction( lambda s,l,t: [ int(t[0]) ] ) charstring=QuotedString(quoteChar='"') | QuotedString(quoteChar="'") | (QuotedString(quoteChar='/') + Optional(Word("im"))) AttOperand= charstring | intnum def makeparser(values): SimpleExpression = PF_KEYWORD('pfvalue') + AttOperator('operator') + AttOperand('testvalue') booleanrule = infixNotation( SimpleExpression, [ ("!", 1, opAssoc.RIGHT, BoolNot), ("&&", 2, opAssoc.LEFT, BoolAnd), ("||", 2, opAssoc.LEFT, BoolOr), ]) def evalResult(loc,pos,tokens): modifiers=None
def _task_decl(self): """ task_declaration ::= task [ automatic ] task_identifier ; { task_item_declaration } statement_or_null endtask | task [ automatic ] task_identifier ( [ task_port_list ] ) ; { block_item_declaration } statement_or_null endtask task_item_declaration ::= block_item_declaration | { attribute_instance } tf_input_declaration ; | { attribute_instance } tf_output_declaration ; | { attribute_instance } tf_inout_declaration ; task_port_list ::= task_port_item { , task_port_item } task_port_item ::= { attribute_instance } tf_input_declaration | { attribute_instance } tf_output_declaration | { attribute_instance } tf_inout_declaration tf_input_declaration ::= input [ reg ] [ signed ] [ range ] list_of_port_identifiers | input task_port_type list_of_port_identifiers tf_output_declaration ::= output [ reg ] [ signed ] [ range ] list_of_port_identifiers | output task_port_type list_of_port_identifiers tf_inout_declaration ::= inout [ reg ] [ signed ] [ range ] list_of_port_identifiers | inout task_port_type list_of_port_identifiers task_port_type ::= integer | real | realtime | time """ self.auto_kw = Keyword('automatic') task_kw = Keyword('task') endtask_kw = Keyword('endtask') input_kw = Keyword('input') output_kw = Keyword('output') inout_kw = Keyword('inout') task_port_type = self.integer_kw | self.real_kw | self.realtime_kw | self.time_kw self.tf_input_decl = ( (input_kw + Optional(self.reg_kw) + Optional(self.signed_kw) + Optional(self.the_range) + delimitedList(~Keyword('input') + identifier)) | (input_kw + task_port_type + delimitedList(identifier))) self.tf_output_decl = ( (output_kw + Optional(self.reg_kw) + Optional(self.signed_kw) + Optional(self.the_range) + delimitedList(~Keyword('input') + identifier)) | (output_kw + task_port_type + delimitedList(identifier))) self.tf_inout_decl = ( (inout_kw + Optional(self.reg_kw) + Optional(self.signed_kw) + Optional(self.the_range) + delimitedList(~Keyword('input') + identifier)) | (inout_kw + task_port_type + delimitedList(identifier))) _task_port_item = self.tf_input_decl | self.tf_output_decl | self.tf_inout_decl task_port_list = delimitedList(_task_port_item) task_item_decl = (self.block_item_declaration | (self.tf_input_decl + SEMI) | (self.tf_output_decl + SEMI) | (self.tf_inout_decl + SEMI)) task_decl = ((task_kw + Optional(self.auto_kw) + identifier + SEMI + ZeroOrMore(task_item_decl) + self.stmt.statement_or_null + endtask_kw) | (task_kw + Optional(self.auto_kw) + identifier + LPARENTH + Optional(task_port_list) + RPARENTH + SEMI + ZeroOrMore(self.block_item_declaration) + self.stmt.statement_or_null + endtask_kw)) return task_decl
def __init__(self): """ expop :: '^' multop :: 'x' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = Literal(".") exp = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(exp + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("x") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div powop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ( (Optional(oneOf("- +")) + (pi | exp | fnumber | ident + lpar + expr + rpar).setParseAction( self.push_first)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar)).setParseAction(self.push_unary_minus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (powop + factor).setParseAction(self.push_first)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.push_first)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.push_first)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "x": operator.mul, "/": operator.truediv, "^": operator.pow } self.function = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0 }
def parse_step1(morph): """parse the field morphology of qurany corpus """ string = "$ " + str(morph).replace("POS:", "£ POS:").replace( "PRON:", "µ PRON:").replace("<", "<").replace(">", ">") + " #" #regular expressions begin = Keyword('$').suppress() center = Keyword('£').suppress() last = Keyword('µ').suppress() end = Keyword('#').suppress() skip = SkipTo(end).suppress() prefix = Word(alphas + "+" + ":") prefixes = Group(ZeroOrMore(~center + prefix)) genderK = TagKeywords(["M", "F"]) numberK = TagKeywords(["S", "D", "P"]) personK = TagKeywords(["1", "2", "3"]) genderL = TagLiterals(["M", "F"]) numberL = TagLiterals(["S", "D", "P"]) personL = TagLiterals(["1", "2", "3"]) person_ = personL + Optional(genderL) + Optional(numberL) gender_ = genderL + numberL gen = person_ | gender_ | numberK | genderK pos = "POS:" + Word(alphas) lem = "LEM:" + CharsNotIn(" ") root = "ROOT:" + CharsNotIn(" ") sp = "SP:" + CharsNotIn(" ") mood = "MOOD:" + CharsNotIn(" ") aspect = TagKeywords(["PERF", "IMPF", "IMPV"]) voice = TagKeywords(["ACT", "PASS"]) form = TagKeywords([ "(I)", "(II)", "(III)", "(IV)", "(V)", "(VI)", "(VII)", "(VIII)", "(IX)", "(X)", "(XI)", "(XII)" ]) verb = aspect | voice | form voc = Keyword("+voc").suppress() deriv = TagKeywords(["ACT", "PCPL", "PASS", "VN"]) state = TagKeywords(["DEF", "INDEF"]) case = TagKeywords(["NOM", "ACC", "GEN"]) nom = case | state tag = lem | root | sp | mood | gen | verb | deriv | nom | voc | skip part = Group(center + pos + ZeroOrMore(~center + ~last + ~end + tag)) base = Group(OneOrMore(~end + ~last + part)) pron = "PRON:" + Group(gen) suffixes = Group(ZeroOrMore(~end + last + pron)) whole = begin + prefixes + base + suffixes + end parsed = whole.parseString(string) return parsed
from pyparsing import Literal, Word, alphas, Optional, OneOrMore, Forward, Group, ZeroOrMore, Literal, Empty, oneOf, nums, ParserElement from pydash import flatten_deep ParserElement.enablePackrat() # $ means words, % means numbers, & means punctuations WildCards = oneOf("$ % &") LeafWord = WildCards | Word(alphas) # aaa+ aaa* aaa? aaa{0,3} aaa{2} RangedQuantifiers = Literal("{") + Word(nums) + Optional( Literal(",") + Word(nums)) + Literal("}") Quantifiers = oneOf("* + ?") | RangedQuantifiers QuantifiedLeafWord = LeafWord + Quantifiers # a sequence ConcatenatedSequence = OneOrMore(QuantifiedLeafWord | LeafWord) # syntax root Rule = Forward() # ( xxx ) GroupStatement = Forward() QuantifiedGroup = GroupStatement + Quantifiers # (?<label> xxx) # TODO: We don't need quantified capture group, so no QuantifiedCaptureGroup. And it is not orAble, can only be in the top level of AST, so it is easier to process CaptureGroupStatement = Forward() # xxx | yyy orAbleStatement = QuantifiedGroup | GroupStatement | ConcatenatedSequence OrStatement = Group(orAbleStatement + OneOrMore(Literal("|") + Group(orAbleStatement))) GroupStatement << Group(Literal("(") + Rule + Literal(")")) CaptureGroupStatement << Group( Literal("(") + Literal("?") + Literal("<") + Word(alphas) + Literal(">") +
def interval(end=0, func=lambda t: t): SEPARATOR = Suppress('-') | Suppress('*') INTERVAL = Optional(Word(nums), default=0) + SEPARATOR + Optional( Word(nums), default=end) INTERVAL.setParseAction(func) return INTERVAL
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division from __future__ import print_function from pyparsing import Keyword from pyparsing import Optional from undebt.pattern.common import COMMA from undebt.pattern.common import INDENT from undebt.pattern.python import ATOM from undebt.pattern.util import tokens_as_list grammar = (INDENT + Keyword("exec").suppress() + ATOM + Keyword("in").suppress() + ATOM + Optional(COMMA.suppress() + ATOM)) @tokens_as_list(assert_len_in=(3, 4)) def replace(tokens): """ exec str in globals(), locals() -> exec(str, globals(), locals()) """ return tokens[0] + "exec(" + ", ".join(tokens[1:]) + ")"
def _tdb_grammar(): #pylint: disable=R0914 """ Convenience function for getting the pyparsing grammar of a TDB file. """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) # matching float w/ regex is ugly but is recommended by pyparsing float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?') \ .setParseAction(lambda t: [float(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums + '_:', min=1) ref_phase_name = symbol_name = Word(alphanums + '_:()', min=1) # species name, e.g., CO2, AL, FE3+ species_name = Word(alphanums + '+-*', min=1) + Optional(Suppress('%')) # constituent arrays are colon-delimited # each subarray can be comma- or space-delimited constituent_array = Group( delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)), ':')) param_types = MatchFirst( [TCCommand(param_type) for param_type in TDB_PARAM_TYPES]) # Let sympy do heavy arithmetic / algebra parsing for us # a convenience function will handle the piecewise details func_expr = Optional(float_number) + OneOrMore(SkipTo(';') \ + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \ Suppress(Word('YNyn', exact=1) | White())) # ELEMENT cmd_element = TCCommand('ELEMENT') + Word(alphas+'/-', min=1, max=2) + Optional(Suppress(ref_phase_name)) + \ Optional(Suppress(OneOrMore(float_number))) + LineEnd() # TYPE_DEFINITION cmd_typedef = TCCommand('TYPE_DEFINITION') + \ Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd()) # FUNCTION cmd_function = TCCommand('FUNCTION') + symbol_name + \ func_expr.setParseAction(_make_piecewise_ast) # ASSESSED_SYSTEMS cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd()) # DEFINE_SYSTEM_DEFAULT cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd()) # DEFAULT_COMMAND cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd()) # LIST_OF_REFERENCES cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd()) # PHASE cmd_phase = TCCommand('PHASE') + symbol_name + \ Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \ Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd() # CONSTITUENT cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \ Suppress(White()) + Suppress(':') + constituent_array + \ Suppress(':') + LineEnd() # PARAMETER cmd_parameter = TCCommand('PARAMETER') + param_types + \ Suppress('(') + symbol_name + \ Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \ Suppress(',') + constituent_array + \ Optional(Suppress(';') + int_number, default=0) + \ Suppress(')') + func_expr.setParseAction(_make_piecewise_ast) # Now combine the grammar together all_commands = cmd_element | \ cmd_typedef | \ cmd_function | \ cmd_ass_sys | \ cmd_defsysdef | \ cmd_defcmd | \ cmd_lor | \ cmd_phase | \ cmd_constituent | \ cmd_parameter return all_commands
except ImportError: ecodes = None print('WARNING: evdev is not available') try: from functools import lru_cache except ImportError: # don't do caching on old python lru_cache = lambda: (lambda f: f) EOL = LineEnd().suppress() EMPTYLINE = LineEnd() COMMENTLINE = pythonStyleComment + EOL INTEGER = Word(nums) STRING = QuotedString('"') REAL = Combine((INTEGER + Optional('.' + Optional(INTEGER))) ^ ('.' + INTEGER)) SIGNED_REAL = Combine(Optional(Word('-+')) + REAL) UDEV_TAG = Word(string.ascii_uppercase, alphanums + '_') # Those patterns are used in type-specific matches TYPES = {'mouse': ('usb', 'bluetooth', 'ps2', '*'), 'evdev': ('name', 'atkbd', 'input'), 'id-input': ('modalias'), 'touchpad': ('i8042', 'rmi', 'bluetooth', 'usb'), 'joystick': ('i8042', 'rmi', 'bluetooth', 'usb'), 'keyboard': ('name', ), 'sensor': ('modalias', ), } # Patterns that are used to set general properties on a device GENERAL_MATCHES = {'acpi',
def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") # token definitions identifier = Word(alphanums + "_." ).setName("identifier") double_quoted_string = QuotedString('"', escChar="\\", multiline=True, unquoteResults=False) # dblQuotedString _noncomma = "".join( [ c for c in printables if c != "," ] ) alphastring_ = OneOrMore(CharsNotIn(_noncomma + ' ')) def parse_html(s, loc, toks): return '<%s>' % ''.join(toks[0]) opener = '<' closer = '>' html_text = nestedExpr( opener, closer, ( CharsNotIn( opener + closer ) ) ).setParseAction(parse_html).leaveWhitespace() ID = ( identifier | html_text | double_quoted_string | #.setParseAction(strip_quotes) | alphastring_ ).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID ).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + rbrace.suppress() + Optional(semi.suppress()) ).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph") edge_point << Group( subgraph | graph_stmt | node_id ).setName('edge_point') node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = OneOrMore( (Optional(strict_) + Group((graph_ | digraph_)) + Optional(ID) + graph_stmt).setResultsName("graph") ) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser
PN_CHARS_BASE_re = u'A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' # [165] PN_CHARS_U ::= PN_CHARS_BASE | '_' PN_CHARS_U_re = '_' + PN_CHARS_BASE_re # [167] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] PN_CHARS_re = u'\\-0-9\u00B7\u0300-\u036F\u203F-\u2040' + PN_CHARS_U_re # PN_CHARS = Regex(u'[%s]'%PN_CHARS_re, flags=re.U) # [168] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)? PN_PREFIX = Regex(u'[%s](?:[%s\\.]*[%s])?' % (PN_CHARS_BASE_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) # [140] PNAME_NS ::= PN_PREFIX? ':' PNAME_NS = Optional(Param('prefix', PN_PREFIX)) + Suppress(':').leaveWhitespace() # [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' ) PN_LOCAL_ESC_re = '\\\\[_~\\.\\-!$&"\'()*+,;=/?#@%]' # PN_LOCAL_ESC = Regex(PN_LOCAL_ESC_re) # regex'd #PN_LOCAL_ESC.setParseAction(lambda x: x[0][1:]) # [172] HEX ::= [0-9] | [A-F] | [a-f] # HEX = Regex('[0-9A-Fa-f]') # not needed # [171] PERCENT ::= '%' HEX HEX PERCENT_re = '%[0-9a-fA-F]{2}' # PERCENT = Regex(PERCENT_re) # regex'd #PERCENT.setParseAction(lambda x: unichr(int(x[0][1:], 16)))
def _parse(text: Text): comment = Suppress('/*' + Regex(r'([^*]|[*][^/])*') + '*/') identifier = (Suppress('`') + Regex(r'[^`]+') + Suppress('`')).setParseAction(lambda toks: toks[0]) string = (Suppress("'") + Regex(r"([^']|\\.)*") + Suppress("'")).setParseAction(lambda toks: toks[0]) reference_option = (CaselessKeyword('RESTRICT') | CaselessKeyword('CASCADE') | CaselessKeyword('SET NULL') | CaselessKeyword('NO ACTION') | CaselessKeyword('SET DEFAULT')) reference_definition = ( Suppress(CaselessKeyword('REFERENCES')) + identifier('reference_tbl_name') + '(' + delimitedList(identifier)('tbl_column') + ')' + ZeroOrMore((Suppress(CaselessKeyword('ON DELETE')) + reference_option('on_delete')) | (Suppress(CaselessKeyword('ON UPDATE')) + reference_option('on_update')))) constraint_definition = ( (((CaselessKeyword('PRIMARY KEY')('type')) | ((CaselessKeyword('FULLTEXT KEY') | CaselessKeyword('UNIQUE KEY') | CaselessKeyword('KEY'))('type') + identifier('index_name'))) + '(' + delimitedList(identifier('key_part*')) + ')') | (Suppress(CaselessKeyword('CONSTRAINT')) + identifier('symbol') + (CaselessKeyword('FOREIGN KEY')('type') + '(' + delimitedList(identifier('key_part*')) + ')' + reference_definition)) ).setParseAction(Constraint) column_type = (Word(alphanums) + Optional('(' + Regex('[^)]+') + ')') + Optional(Suppress(CaselessKeyword('UNSIGNED')))) column_definition = ( identifier('col_name') + column_type('col_type') + ZeroOrMore( (CaselessKeyword('NULL') | CaselessKeyword('NOT NULL'))('nullability') | (CaselessKeyword('AUTO_INCREMENT'))('auto_increment') | (Suppress(CaselessKeyword('COMMENT')) + string('comment')) | (Suppress(CaselessKeyword('DEFAULT')) + (Word(alphanums + '_') | string).setParseAction(lambda toks: toks[0])('default')) | (Suppress(CaselessKeyword('ON DELETE')) + (Word(alphanums + '_') | reference_option)('on_delete')) | (Suppress(CaselessKeyword('ON UPDATE')) + (Word(alphanums + '_') | reference_option)('on_update'))) ).setParseAction(Column) create_definition = column_definition('column*') | constraint_definition( 'constraint*') create_table_statement = ( Suppress(CaselessKeyword('CREATE') + CaselessKeyword('TABLE')) + identifier('tbl_name') + Suppress('(') + delimitedList(create_definition) + Suppress(')') + Suppress(Regex('[^;]*'))).setParseAction(Table) parser = delimitedList(comment | create_table_statement('table*'), delim=';') + Suppress(Optional(';')) return parser.parseString(text, parseAll=True)['table']