def __init__(self): # literals star = Literal('*') comma = Suppress(',') # indentifiers identifier = Word(alphas, alphanums+'_') alias = identifier.copy() # select clause column_name = Combine(Optional(alias + '.') + identifier + Optional(' as ' + identifier))\ .setResultsName('column_name') select = Keyword('select', caseless=1) select_clause = (star | Group(delimitedList(column_name, comma)))\ .setResultsName('select_clause') # from clause from_ = Keyword('from', caseless=1) table_name = delimitedList(identifier + Optional(alias), comma) from_clause = table_name.setResultsName('from_clause') # select statment self.select_stmt = select + select_clause + from_ + from_clause
def make_parser(self): g = self lpar = Literal('(').suppress() rpar = Literal(')').suppress() colon = Literal(':').suppress() delimiter = Literal(';').suppress() unknown = Literal('?').setParseAction(lambda s,l,t: [0]) # ? -> number 0 number = Word(nums).setParseAction(map_int) ident = Word(alphas+'_', alphas+nums+'_') label_gen = ( ident + lpar + Optional(number) + rpar ).setParseAction(tokenize('LabelGen')) label_def = (ident + colon).setParseAction(tokenize('LabelDef'), run(g.gen_label_def)) label_ref = ident.copy().setParseAction(tokenize('LabelRef')) operand = number | label_gen | label_ref | unknown instr = (ident + List(operand) + delimiter).setParseAction(tokenize('Instr'), run(g.gen_instr)) entry = instr | label_def progr = List(entry).setParseAction(run(self.gen_end_progr)) return progr
def func_tokens(dictionary, parse_action): func_name = Word(alphas+'_', alphanums+'_') func_ident = Combine('$' + func_name.copy()('funcname')) func_tok = func_ident + originalTextFor(nestedExpr())('args') func_tok.leaveWhitespace() func_tok.setParseAction(parse_action) func_tok.enablePackrat() rx_tok = Combine(Literal('$').suppress() + Word(nums)('num')) def replace_token(tokens): index = int(tokens.num) return dictionary.get(index, u'') rx_tok.setParseAction(replace_token) strip = lambda s, l, tok: tok[0].strip() text_tok = CharsNotIn(u',').setParseAction(strip) quote_tok = QuotedString('"') if dictionary: arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok)) else: arglist = Optional(delimitedList(quote_tok | text_tok)) return func_tok, arglist, rx_tok
def __init__(self): # literals star = Literal('*') comma = Suppress(',') # indentifiers identifier = Word(alphas, alphanums + '_') alias = identifier.copy() # select clause column_name = Combine(Optional(alias + '.') + identifier + Optional(' as ' + identifier))\ .setResultsName('column_name') select = Keyword('select', caseless=1) select_clause = (star | Group(delimitedList(column_name, comma)))\ .setResultsName('select_clause') # from clause from_ = Keyword('from', caseless=1) table_name = delimitedList(identifier + Optional(alias), comma) from_clause = table_name.setResultsName('from_clause') # select statment self.select_stmt = select + select_clause + from_ + from_clause
def func_tokens(dictionary, parse_action): func_name = Word(alphas + '_', alphanums + '_') func_ident = Combine('$' + func_name.copy()('funcname')) func_tok = func_ident + originalTextFor(nestedExpr())('args') func_tok.leaveWhitespace() func_tok.setParseAction(parse_action) func_tok.enablePackrat() rx_tok = Combine(Literal('$').suppress() + Word(nums)('num')) def replace_token(tokens): index = int(tokens.num) return dictionary.get(index, '') rx_tok.setParseAction(replace_token) strip = lambda s, l, tok: tok[0].strip() text_tok = CharsNotIn(',').setParseAction(strip) quote_tok = QuotedString('"') if dictionary: arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok)) else: arglist = Optional(delimitedList(quote_tok | text_tok)) return func_tok, arglist, rx_tok
def parse_upstreams(self): # function to create range validation parse actions def validInRange(lo, hi): def parseAction(tokens): if not lo <= int(tokens[0]) <= hi: raise ParseException("", 0, "integer outside range %d-%d" % (lo, hi)) return parseAction # define basic building blocks integer = Word(nums) ip_int = integer.copy().setParseAction(validInRange(0, 255)) ip_addr = Combine(ip_int + ("." + ip_int) * 3) ip_port = integer.copy().setParseAction(validInRange(1025, 65535)) ip_addr_port = ip_addr("ip_addr") + ":" + ip_port("ip_port") ident = Word(alphas, alphanums + "_") # define punctuation needed — but use Suppress so it does # not clutter up the output tokens SEMI, LBRACE, RBRACE = map(Suppress, ";{}") # define a server entry that will be found in each upstream block server_def = Keyword("server") + ip_addr_port + SEMI # define an upstream block upstream_block = ( Keyword("upstream") + ident("stream_id") + LBRACE + OneOrMore(Group(server_def))("servers") + RBRACE ) # now scan through the string containing the nginx config # data, extract the upstream blocks and their corresponding # server definitions — access tokens using results names as # specified when defining server_def and upstream_block for usb in upstream_block.searchString(self.nginx_config): upstream = [] for server in usb.servers: upstream.append(server.ip_addr + ":" + server.ip_port) self.upstreams[usb.stream_id] = upstream
(False, False) : "value %%s is not in the range (%s to %s)" % (minval,maxval), }[minval is None, maxval is None] # define the actual range checking parse action def rangeCheckParseAction(string, loc, tokens): parsedval = tokens[0] if not inRangeFn(parsedval): raise ParseException(string, loc, outOfRangeMessage % parsedval) return rangeCheckParseAction # define the expressions for a date of the form YYYY/MM/DD or YYYY/MM (assumes YYYY/MM/01) integer = Word(nums).setName("integer") integer.setParseAction(lambda t:int(t[0])) month = integer.copy().addParseAction(rangeCheck(1,12)) day = integer.copy().addParseAction(rangeCheck(1,31)) year = integer.copy().addParseAction(rangeCheck(2000, None)) SLASH = Suppress('/') dateExpr = year("year") + SLASH + month("month") + Optional(SLASH + day("day")) dateExpr.setName("date") # convert date fields to datetime (also validates dates as truly valid dates) dateExpr.setParseAction(lambda t: datetime(t.year, t.month, t.day or 1).date()) # add range checking on dates mindate = datetime(2002,1,1).date() maxdate = datetime.now().date() dateExpr.addParseAction(rangeCheck(mindate, maxdate))
}[minval is None, maxval is None] # define the actual range checking parse action def rangeCheckParseAction(string, loc, tokens): parsedval = tokens[0] if not inRangeFn(parsedval): raise ParseException(string, loc, outOfRangeMessage % parsedval) return rangeCheckParseAction # define the expressions for a date of the form YYYY/MM/DD or YYYY/MM (assumes YYYY/MM/01) integer = Word(nums).setName("integer") integer.setParseAction(lambda t: int(t[0])) month = integer.copy().addParseAction(rangeCheck(1, 12)) day = integer.copy().addParseAction(rangeCheck(1, 31)) year = integer.copy().addParseAction(rangeCheck(2000, None)) SLASH = Suppress('/') dateExpr = year("year") + SLASH + month("month") + Optional(SLASH + day("day")) dateExpr.setName("date") # convert date fields to datetime (also validates dates as truly valid dates) dateExpr.setParseAction(lambda t: datetime(t.year, t.month, t.day or 1).date()) # add range checking on dates mindate = datetime(2002, 1, 1).date() maxdate = datetime.now().date() dateExpr.addParseAction(rangeCheck(mindate, maxdate))
class Parsers: def __init__(self, debug=False): self._debug = debug self._create_whitespace_parser() self._create_field_parser() self._create_ip_address_parser() self._create_canonical_hostname_parser() self._create_alias_parser() self._create_aliases_parser() self._create_entry_parser() self._create_comment_parser() self._create_line_content() self._create_line_end_parser() self._create_line_parser() self._create_hosts_parser() def _config(self, parser, name, parse_action): parser.leaveWhitespace() parser.setDebug(self._debug) parser.setName(name) parser.setParseAction(parse_action) def _create_alias_parser(self): def parse_action(string, location, tokens): return AliasNode(tokens[0]) self.alias = self.field.copy() self._config(self.alias, 'alias', parse_action) def _create_aliases_parser(self): def parse_action(string, location, tokens): return AliasesNode(tokens.asList()) self.aliases = self.alias + ZeroOrMore(self.whitespace + self.alias) self._config(self.aliases, 'aliases', parse_action) def _create_entry_parser(self): def parse_action(string, location, tokens): return EntryNode(tokens.asList()) self.entry = ( self.ip_address + self.whitespace + self.canonical_hostname + Optional(self.whitespace + self.aliases) ) self._config(self.entry, 'entry', parse_action) def _create_field_parser(self): self.field = Word(printables.replace(COMMENT_START, '')) self._config(self.field, 'field', lambda: None) def _create_canonical_hostname_parser(self): def parse_action(string, location, tokens): return CanonicalHostnameNode(tokens[0]) self.canonical_hostname = self.field.copy() self._config(self.canonical_hostname, 'canonical_hostname', parse_action) def _create_comment_parser(self): def parse_action(string, location, tokens): return CommentNode(''.join(tokens.asList())) self.comment = COMMENT_START + restOfLine self._config(self.comment, 'comment', parse_action) def _create_ip_address_parser(self): def parse_action(string, location, tokens): return IPAddressNode(tokens[0]) self.ip_address = self.field.copy() self._config(self.ip_address, 'ip_address', parse_action) def _create_hosts_parser(self): def parse_action(string, location, tokens): return HostsNode(tokens.asList()) self.hosts = ZeroOrMore(self.line) + StringEnd() self._config(self.hosts, 'hosts', parse_action) def _create_line_content(self): def parse_action(string, location, tokens): return LineContentNode(tokens.asList()) self.line_content = (Optional(self.whitespace) + Optional(self.entry) + Optional(self.whitespace) + Optional(self.comment)) self._config(self.line_content, 'line_content', parse_action) def _create_line_parser(self): def parse_action(string, location, tokens): return LineNode(tokens.asList()) self.line = self.line_content + self.line_end self._config(self.line, 'line', parse_action) def _create_line_end_parser(self): def parse_action(string, location, tokens): return LineEndNode(tokens[0]) self.line_end = LineEnd() self._config(self.line_end, 'line_end', parse_action) def _create_whitespace_parser(self): def parse_action(string, location, tokens): return WhitespaceNode(tokens[0]) self.whitespace = Word('\t ') self._config(self.whitespace, 'white_space', parse_action)
def _get_parser(cls): if cls._parser is not None: return cls._parser ParserElement.enablePackrat() LPAR, RPAR, COMMA, LBRACKET, RBRACKET, LT, GT = map(Literal, "(),[]<>") ungrouped_select_stmt = Forward().setName("select statement") # keywords ( UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, WITH, EXTRACT, PARTITION, ROWS, RANGE, UNBOUNDED, PRECEDING, CURRENT, ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD, DATE_SUB, ADDDATE, SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE, LAST_VALUE, NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK, DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME, DATETIME, TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES, GEOGRAPHY, ARRAY, STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG, ARRAY_CONCAT_AGG, AVG, BIT_AND, BIT_OR, BIT_XOR, COUNT, COUNTIF, LOGICAL_AND, LOGICAL_OR, MAX, MIN, STRING_AGG, SUM, CORR, COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP, STDDEV, VAR_POP, VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB, GENERATE_ARRAY, GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR, SYSTEMTIME, AS, OF, WINDOW, RESPECT, IGNORE, NULLS, ) = map( CaselessKeyword, """ UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, WITH, EXTRACT, PARTITION, ROWS, RANGE, UNBOUNDED, PRECEDING, CURRENT, ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD, DATE_SUB, ADDDATE, SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE, LAST_VALUE, NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK, DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME, DATETIME, TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES, GEOGRAPHY, ARRAY, STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG, ARRAY_CONCAT_AGG, AVG, BIT_AND, BIT_OR, BIT_XOR, COUNT, COUNTIF, LOGICAL_AND, LOGICAL_OR, MAX, MIN, STRING_AGG, SUM, CORR, COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP, STDDEV, VAR_POP, VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB, GENERATE_ARRAY, GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR, SYSTEMTIME, AS, OF, WINDOW, RESPECT, IGNORE, NULLS """.replace(",", "").split(), ) keyword_nonfunctions = MatchFirst(( UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, STRUCT, WINDOW, )) keyword = keyword_nonfunctions | MatchFirst(( ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, DATE_ADD, DATE_SUB, ADDDATE, SUBDATE, INTERVAL, STRING_AGG, REGEXP_EXTRACT, SPLIT, ORDINAL, UNNEST, SAFE_CAST, PARTITION, TIMESTAMP_ADD, TIMESTAMP_SUB, ARRAY, GENERATE_ARRAY, GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, )) identifier_word = Word(alphas + "_@#", alphanums + "@$#_") identifier = ~keyword + identifier_word.copy() collation_name = identifier.copy() # NOTE: Column names can be keywords. Doc says they cannot, but in practice it seems to work. column_name = identifier.copy() cast_to = identifier.copy() qualified_column_name = Group( delimitedList(column_name, delim=".") + Optional( Suppress("::") + delimitedList(cast_to("cast"), delim="::"))) # NOTE: As with column names, column aliases can be keywords, e.g. functions like `current_time`. Other # keywords, e.g. `from` make parsing pretty difficult (e.g. "SELECT a from from b" is confusing.) column_alias = ~keyword_nonfunctions + column_name.copy() table_name = identifier.copy() table_alias = identifier.copy() index_name = identifier.copy() function_name = identifier.copy() parameter_name = identifier.copy() # NOTE: The expression in a CASE statement can be an integer. E.g. this is valid SQL: # select CASE 1 WHEN 1 THEN -1 ELSE -2 END from test_table unquoted_case_identifier = ~keyword + Word(alphanums + "$_") quoted_case_identifier = ~keyword + (QuotedString('"') ^ Suppress("`") + CharsNotIn("`") + Suppress("`")) case_identifier = quoted_case_identifier | unquoted_case_identifier case_expr = (Optional(case_identifier + Suppress(".")) + Optional(case_identifier + Suppress(".")) + case_identifier) # expression expr = Forward().setName("expression") integer = Regex(r"[+-]?\d+") numeric_literal = Regex(r"[+-]?\d*\.?\d+([eE][+-]?\d+)?") string_literal = QuotedString("'") | QuotedString('"') | QuotedString( "`") regex_literal = "r" + string_literal blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'") date_or_time_literal = (DATE | TIME | DATETIME | TIMESTAMP) + string_literal literal_value = ( numeric_literal | string_literal | regex_literal | blob_literal | date_or_time_literal | NULL | CURRENT_TIME + Optional(LPAR + Optional(string_literal) + RPAR) | CURRENT_DATE + Optional(LPAR + Optional(string_literal) + RPAR) | CURRENT_TIMESTAMP + Optional(LPAR + Optional(string_literal) + RPAR)) bind_parameter = Word("?", nums) | Combine(oneOf(": @ $") + parameter_name) type_name = oneOf( """TEXT REAL INTEGER BLOB NULL TIMESTAMP STRING DATE INT64 NUMERIC FLOAT64 BOOL BYTES DATETIME GEOGRAPHY TIME ARRAY STRUCT""", caseless=True, ) date_part = oneOf( """DAY DAY_HOUR DAY_MICROSECOND DAY_MINUTE DAY_SECOND HOUR HOUR_MICROSECOND HOUR_MINUTE HOUR_SECOND MICROSECOND MINUTE MINUTE_MICROSECOND MINUTE_SECOND MONTH QUARTER SECOND SECOND_MICROSECOND WEEK YEAR YEAR_MONTH""", caseless=True, ) datetime_operators = (DATE_ADD | DATE_SUB | ADDDATE | SUBDATE | TIMESTAMP_ADD | TIMESTAMP_SUB) def invalid_date_add(s, loc, tokens): prev_newline = s[:loc].rfind('\n') prev_prev_newline = s[:prev_newline].rfind('\n') if '--ignore' in s[prev_prev_newline:prev_newline]: pass else: raise RuntimeError( "{} is not valid, did you mean 'date_add'".format( tokens[0])) #bad_datetime_operators = ( # CaselessKeyword('dateadd').setParseAction(invalid_date_add) #) grouping_term = expr.copy() ordering_term = Group( expr("order_key") + Optional(COLLATE + collation_name("collate")) + Optional(ASC | DESC)("direction"))("ordering_term") function_arg = expr.copy()("function_arg") function_args = Optional( "*" | Optional(DISTINCT) + delimitedList(function_arg) + Optional((RESPECT | IGNORE) + NULLS))("function_args") function_call = ((function_name | keyword)("function_name") + LPAR + Group(function_args)("function_args_group") + RPAR)('function') navigation_function_name = (FIRST_VALUE | LAST_VALUE | NTH_VALUE | LEAD | LAG | PERCENTILE_CONT | PRECENTILE_DISC) aggregate_function_name = (ANY_VALUE | ARRAY_AGG | ARRAY_CONCAT_AGG | AVG | BIT_AND | BIT_OR | BIT_XOR | COUNT | COUNTIF | LOGICAL_AND | LOGICAL_OR | MAX | MIN | STRING_AGG | SUM) statistical_aggregate_function_name = (CORR | COVAR_POP | COVAR_SAMP | STDDEV_POP | STDDEV_SAMP | STDDEV | VAR_POP | VAR_SAMP | VARIANCE) numbering_function_name = (RANK | DENSE_RANK | PERCENT_RANK | CUME_DIST | NTILE | ROW_NUMBER) analytic_function_name = ( navigation_function_name | aggregate_function_name | statistical_aggregate_function_name | numbering_function_name)("analytic_function_name") partition_expression_list = delimitedList(grouping_term)( "partition_expression_list") window_frame_boundary_start = (UNBOUNDED + PRECEDING | numeric_literal + (PRECEDING | FOLLOWING) | CURRENT + ROW) window_frame_boundary_end = (UNBOUNDED + FOLLOWING | numeric_literal + (PRECEDING | FOLLOWING) | CURRENT + ROW) window_frame_clause = (ROWS | RANGE) + ( ((UNBOUNDED + PRECEDING) | (numeric_literal + PRECEDING) | (CURRENT + ROW)) | (BETWEEN + window_frame_boundary_start + AND + window_frame_boundary_end)) window_name = identifier.copy()("window_name") window_specification = ( Optional(window_name) + Optional(PARTITION + BY + partition_expression_list) + Optional(ORDER + BY + delimitedList(ordering_term)) + Optional(window_frame_clause)("window_specification")) analytic_function = ( analytic_function_name + LPAR + function_args.setParseAction(debug) + RPAR + OVER + (window_name | LPAR + Optional(window_specification) ('window') + RPAR))("analytic_function") string_agg_term = (STRING_AGG + LPAR + Optional(DISTINCT)('has_distinct') + expr('string_agg_expr') + Optional(COMMA + string_literal('delimiter')) + Optional(ORDER + BY + expr + Optional(ASC | DESC) + Optional(LIMIT + integer)) + RPAR)("string_agg") array_literal = ( Optional(ARRAY + Optional(LT + delimitedList(type_name) + GT)) + LBRACKET + delimitedList(expr) + RBRACKET) interval = INTERVAL + expr + date_part array_generator = (GENERATE_ARRAY + LPAR + numeric_literal + COMMA + numeric_literal + COMMA + numeric_literal + RPAR) date_array_generator = ( (GENERATE_DATE_ARRAY | GENERATE_TIMESTAMP_ARRAY) + LPAR + expr("start_date") + COMMA + expr("end_date") + Optional(COMMA + interval) + RPAR) explicit_struct = ( STRUCT + Optional(LT + delimitedList(type_name) + GT) + LPAR + Optional(delimitedList(expr + Optional(AS + identifier))) + RPAR) case_when = WHEN + expr.copy()("when") case_then = THEN + expr.copy()("then") case_clauses = Group(ZeroOrMore(case_when + case_then)) case_else = ELSE + expr.copy()("_else") case_stmt = (CASE + Optional(case_expr.copy()) + case_clauses("case_clauses") + Optional(case_else) + END)("case") class SelectStatement(SemanticToken): def __init__(self, tokens): self.tokens = tokens def getName(self): return 'select' @classmethod def parse(cls, tokens): return SelectStatement(tokens) class Function(SemanticToken): def __init__(self, func, tokens): self.func = func self.tokens = tokens def getName(self): return 'function' @classmethod def parse(cls, tokens): method = tokens[0] args = tokens[2:-1] return Function(method, args) def __repr__(self): return "func:{}({})".format(self.func, self.tokens) class WindowFunction(Function): def __init__(self, func, tokens, func_args, partition_args, order_args, window_args): self.func = func self.tokens = tokens self.func_args = func_args self.partition_args = partition_args self.order_args = order_args self.window_args = window_args def getName(self): return 'window function' @classmethod def parse(cls, tokens): return WindowFunction(tokens.analytic_function_name, tokens, tokens.function_args, tokens.partition_expression_list, tokens.ordering_term, tokens.window_specification) def __repr__(self): return "window:{}({})over({}, {}, {})".format( self.func, self.func_args, self.partition_args, self.order_args, self.window_args) class CaseStatement(SemanticToken): def __init__(self, tokens, whens, _else): self.tokens = tokens self.whens = whens self._else = _else def getName(self): return 'case' @classmethod def parse_whens(self, tokens): whens = [] while len(tokens) > 0: _, when, _, then, *tokens = tokens whens.append({"when": when, "then": then}) return whens @classmethod def parse(cls, tokens): whens = tokens[1] _else = tokens[3] return CaseStatement(tokens, cls.parse_whens(whens), _else) def __repr__(self): return "<case statement ({}, {})>".format( len(self.whens), self._else) expr_term = ( (analytic_function)("analytic_function").setParseAction( WindowFunction.parse) | (CAST + LPAR + expr + AS + type_name + RPAR)("cast") | (SAFE_CAST + LPAR + expr + AS + type_name + RPAR)("safe_cast") | (Optional(EXISTS) + LPAR + ungrouped_select_stmt + RPAR)("subselect") | (literal_value)("literal") | (bind_parameter)("bind_parameter") | (EXTRACT + LPAR + expr + FROM + expr + RPAR)("extract") | case_stmt.setParseAction(CaseStatement.parse) | (datetime_operators + LPAR + expr + COMMA + interval + RPAR)("date_operation") #| (bad_datetime_operators + LPAR + expr + COMMA + interval + RPAR) | string_agg_term("string_agg_term") | array_literal("array_literal") | array_generator("array_generator") | date_array_generator("date_array_generator") | explicit_struct("explicit_struct") | function_call("function_call").setParseAction(Function.parse) | qualified_column_name("column").setParseAction( lambda x: ".".join([str(i) for i in x[0]])) ).setParseAction(debug) + Optional(LBRACKET + (OFFSET | ORDINAL) + LPAR + expr + RPAR + RBRACKET)("offset_ordinal") struct_term = (LPAR + delimitedList(expr_term) + RPAR) KNOWN_OPS = [(BETWEEN, AND), Literal("||").setName("concat"), Literal("*").setName("mul"), Literal("/").setName("div"), Literal("+").setName("add"), Literal("-").setName("sub"), Literal("<>").setName("neq"), Literal(">").setName("gt"), Literal("<").setName("lt"), Literal(">=").setName("gte"), Literal("<=").setName("lte"), Literal("=").setName("eq"), Literal("==").setName("eq"), Literal("!=").setName("neq"), IN.setName("in"), IS.setName("is"), LIKE.setName("like"), OR.setName("or"), AND.setName("and"), NOT.setName('not')] class Operator(SemanticToken): def __init__(self, op, assoc, name, tokens): self.op = op self.assoc = assoc self.name = name self.tokens = tokens def getName(self): return 'operator' @classmethod def parse(cls, tokens): # ARRANGE INTO {op: params} FORMAT toks = tokens[0] if toks[1] in KNOWN_OPS: op = KNOWN_OPS[KNOWN_OPS.index(toks[1])] if toks.subselect: import ipdb ipdb.set_trace() return Operator(op, 'binary', op.name, [toks[0], toks[2:]]) else: import ipdb ipdb.set_trace() return tokens @classmethod def parse_unary(cls, tokens): toks = tokens[0] if toks[0] in KNOWN_OPS: op = KNOWN_OPS[KNOWN_OPS.index(toks[0])] else: import ipdb ipdb.set_trace() return Operator(op, 'unary', op.name, [toks[1:]]) @classmethod def parse_ternary(cls, tokens): import ipdb ipdb.set_trace() def __repr__(self): return "<operator({}, {}, {})>".format(self.op, self.assoc, self.tokens) UNARY, BINARY, TERNARY = 1, 2, 3 expr << infixNotation( (expr_term | struct_term), [ (oneOf("- + ~") | NOT, UNARY, opAssoc.RIGHT, Operator.parse_unary), (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT, Operator.parse_unary), ("||", BINARY, opAssoc.LEFT, Operator.parse), (oneOf("* / %"), BINARY, opAssoc.LEFT, Operator.parse), (oneOf("+ -"), BINARY, opAssoc.LEFT, Operator.parse), (oneOf("<< >> & |"), BINARY, opAssoc.LEFT, Operator.parse), (oneOf("= > < >= <= <> != !< !>"), BINARY, opAssoc.LEFT, Operator.parse), (IS + Optional(NOT) | Optional(NOT) + IN | Optional(NOT) + LIKE | GLOB | MATCH | REGEXP, BINARY, opAssoc.LEFT, Operator.parse), ((BETWEEN, AND), TERNARY, opAssoc.LEFT, Operator.parse_ternary), (Optional(NOT) + IN + LPAR + Group(ungrouped_select_stmt | delimitedList(expr)) + RPAR, UNARY, opAssoc.LEFT, Operator.parse_unary), (AND, BINARY, opAssoc.LEFT, Operator.parse), (OR, BINARY, opAssoc.LEFT, Operator.parse), ], lpar=Literal('('), rpar=Literal(')'), ) quoted_expr = (expr ^ Suppress('"') + expr + Suppress('"') ^ Suppress("'") + expr + Suppress("'") ^ Suppress("`") + expr + Suppress("`"))("quoted_expr") compound_operator = (UNION + Optional(ALL | DISTINCT) | INTERSECT + DISTINCT | EXCEPT + DISTINCT | INTERSECT | EXCEPT)("compound_operator") join_constraint = Group( Optional(ON + expr | USING + LPAR + Group(delimitedList(qualified_column_name)) + RPAR))("join_constraint") join_op = (COMMA | Group( Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | RIGHT + OUTER | RIGHT | FULL + OUTER | OUTER | FULL) + JOIN))("join_op") join_source = Forward() # We support three kinds of table identifiers. # # First, dot delimited info like project.dataset.table, where # each component follows the rules described in the BigQuery # docs, namely: # Contain letters (upper or lower case), numbers, and underscores # # Second, a dot delimited quoted string. Since it's quoted, we'll be # liberal w.r.t. what characters we allow. E.g.: # `project.dataset.name-with-dashes` # # Third, a series of quoted strings, delimited by dots, e.g.: # `project`.`dataset`.`name-with-dashes` # # We won't attempt to support combinations, like: # project.dataset.`name-with-dashes` # `project`.`dataset.name-with-dashes` def record_table_identifier(t): identifier_list = t.asList() padded_list = [None] * (3 - len(identifier_list)) + identifier_list cls._table_identifiers.add(tuple(padded_list)) standard_table_part = ~keyword + Word(alphanums + "_") standard_table_identifier = ( Optional(standard_table_part("project") + Suppress(".")) + Optional(standard_table_part("dataset") + Suppress(".")) + standard_table_part("table") ).setParseAction(lambda t: record_table_identifier(t)) quoted_project_part = ( Suppress('"') + CharsNotIn('"') + Suppress('"') | Suppress("'") + CharsNotIn("'") + Suppress("'") | Suppress("`") + CharsNotIn("`") + Suppress("`")) quoted_table_part = (Suppress('"') + CharsNotIn('".') + Suppress('"') | Suppress("'") + CharsNotIn("'.") + Suppress("'") | Suppress("`") + CharsNotIn("`.") + Suppress("`")) quoted_table_parts_identifier = ( Optional(quoted_project_part("project") + Suppress(".")) + Optional(quoted_table_part("dataset") + Suppress(".")) + quoted_table_part("table") ).setParseAction(lambda t: record_table_identifier(t)) def record_quoted_table_identifier(t): identifier_list = t.asList()[0].split(".") first = ".".join(identifier_list[0:-2]) or None second = identifier_list[-2] third = identifier_list[-1] identifier_list = [first, second, third] padded_list = [None] * (3 - len(identifier_list)) + identifier_list cls._table_identifiers.add(tuple(padded_list)) quotable_table_parts_identifier = ( Suppress('"') + CharsNotIn('"') + Suppress('"') | Suppress("'") + CharsNotIn("'") + Suppress("'") | Suppress("`") + CharsNotIn("`") + Suppress("`") ).setParseAction(lambda t: record_quoted_table_identifier(t)) table_identifier = (standard_table_identifier | quoted_table_parts_identifier | quotable_table_parts_identifier) def record_ref(t): lol = [t.op] + t.ref_target.asList() cls._with_aliases.add(tuple(lol)) cls._table_identifiers.add(tuple(lol)) ref_target = identifier.copy() single_source = ( # ref + source statements ((Suppress('{{') + (CaselessKeyword('ref') | CaselessKeyword("source"))("op") + LPAR + delimitedList((Suppress("'") | Suppress('"')) + ref_target + (Suppress("'") | Suppress('"')))("ref_target") + RPAR + Suppress("}}")).setParseAction(record_ref) | table_identifier) + Optional(Optional(AS) + table_alias("table_alias*")) + Optional(FOR + SYSTEMTIME + AS + OF + string_literal) + Optional(INDEXED + BY + index_name("name") | NOT + INDEXED) ("index") | (LPAR + ungrouped_select_stmt + RPAR + Optional(Optional(AS) + table_alias))('subquery') | (LPAR + join_source + RPAR) | (UNNEST + LPAR + expr + RPAR) + Optional(Optional(AS) + column_alias)) join_source << (Group(single_source + OneOrMore( Group(join_op + single_source + join_constraint)('joins*'))) | single_source)('sources*') over_partition = ( PARTITION + BY + delimitedList(partition_expression_list))("over_partition") over_order = ORDER + BY + delimitedList(ordering_term) over_unsigned_value_specification = expr over_window_frame_preceding = ( UNBOUNDED + PRECEDING | over_unsigned_value_specification + PRECEDING | CURRENT + ROW) over_window_frame_following = ( UNBOUNDED + FOLLOWING | over_unsigned_value_specification + FOLLOWING | CURRENT + ROW) over_window_frame_bound = (over_window_frame_preceding | over_window_frame_following) over_window_frame_between = (BETWEEN + over_window_frame_bound + AND + over_window_frame_bound) over_window_frame_extent = (over_window_frame_preceding | over_window_frame_between) over_row_or_range = (ROWS | RANGE) + over_window_frame_extent over = (OVER + LPAR + Optional(over_partition) + Optional(over_order) + Optional(over_row_or_range) + RPAR)("over") result_column = ( Optional(table_name + ".") + "*" + Optional(EXCEPT + LPAR + delimitedList(column_name) + RPAR) | Group(quoted_expr + Optional(over) + Optional(Optional(AS) + column_alias('alias')))) window_select_clause = (WINDOW + identifier + AS + LPAR + window_specification + RPAR) select_core = ( SELECT + Optional(DISTINCT | ALL) + Group(delimitedList(result_column))("columns") + Optional(FROM - join_source("from*")) + Optional(WHERE + expr('where')) + Optional(GROUP + BY + Group(delimitedList(grouping_term))("group_by_terms")) + Optional(HAVING + expr("having_expr")) + Optional(ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms")) + Optional(delimitedList(window_select_clause))) grouped_select_core = select_core | (LPAR + select_core + RPAR) ungrouped_select_stmt << ( grouped_select_core + ZeroOrMore(compound_operator + grouped_select_core) + Optional(LIMIT + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)("limit")))("select") select_stmt = ungrouped_select_stmt | (LPAR + ungrouped_select_stmt + RPAR) # define comment format, and ignore them sql_comment = oneOf("-- #") + restOfLine | cStyleComment select_stmt.ignore(sql_comment) def record_with_alias(t): identifier_list = t.asList() padded_list = [None] * (3 - len(identifier_list)) + identifier_list cls._with_aliases.add(tuple(padded_list)) with_stmt = Forward().setName("with statement") with_clause = Group( identifier.setParseAction(lambda t: record_with_alias(t)) ('cte_name') - AS - LPAR + (select_stmt | with_stmt) - RPAR) with_core = WITH + delimitedList(with_clause)('ctes') with_stmt << (with_core - ~Literal(',') + ungrouped_select_stmt) with_stmt.ignore(sql_comment) select_or_with = select_stmt | with_stmt select_or_with_parens = LPAR + select_or_with - RPAR cls._parser = select_or_with | select_or_with_parens return cls._parser
name_eq = Literal('name') + '=' + quotedString bind_keyword = Keyword('bind').setParseAction(lambda s,loc,toks: [' bind']) bind_attr = bind_keyword + Literal('(') + 'c' + Optional(comma + name_eq) + ')' result_attr = Literal('result') + '(' + NAME + ')' result_attr.setParseAction(lambda s,loc,toks: [' '+''.join(toks)]) func_post = eachMostOnce(bind_attr, result_attr) elem_attr = Keyword('elemental').setName('elemental') elem_attr.setParseAction(lambda s,loc,toks: [toks[0] + ' ']) pure_attr = Keyword('pure').setName('pure') pure_attr.setParseAction(lambda s,loc,toks: [toks[0] + ' ']) recu_attr = Keyword('recursive').setName('elemental') recu_attr.setParseAction(lambda s,loc,toks: [toks[0] + ' ']) func_pre = eachMostOnce(elem_attr, pure_attr, recu_attr) func_name = NAME.copy().setName('func_name') func_name.setParseAction(lambda s,loc,toks: [' '+toks[0]]) func_def = Optional(func_pre) + Keyword('function') + func_name + arglist \ + Optional(func_post) + EOLL func_def.setParseAction(lambda s,loc,toks: [''.join(toks), '', 'end function']) subr_def = Keyword('subroutine') + func_name + arglist + Optional(bind_attr) + EOLL subr_def.setParseAction(lambda s,loc,toks: [''.join(toks), '', 'end subroutine']) inte_name = NAME.copy().setParseAction(lambda s,loc,toks: [' '+toks[0]]) inte_assignment = Literal('assignment') + '(' + Literal('=') + ')' inte_assignment.setParseAction(lambda s,loc,toks: [' ' + ''.join(toks)]) operators = comp_op | oneOf('+ - * / ** //') | Combine('.' + NAME + '.') inte_operator = Literal('operator') + '(' + operators + ')'
# "Names (also called identifiers) in Lua can be any string of letters, # digits, and underscores, not beginning with a digit." name = Word(alphas + "_", alphanums + "_") comment = Suppress(Literal('--') + restOfLine) # XXX: we are cheating a lot here as there is also long bracket form... and some other differences... literal_string = (QuotedString("'", "\\") | QuotedString( '"', "\\")).setParseAction(lambda t, p, v: ast.LiteralString(*v)) exp = Forward() explist = Forward() tableconstructor = Forward() # var ::= Name var = name.copy().setParseAction(from_parse_result(ast.Var)) # There is additional (":" + name) prefix which is moved here from # functioncall definition # args ::= ‘(’ [explist] ‘)’ | tableconstructor | LiteralString args = (Optional(Suppress(':') + var, default=None).setResultsName('method') + ((Suppress('(') + Optional(explist, default=[]) + Suppress(')')) | tableconstructor | Group(literal_string)).setResultsName('args')) def function_or_method_call(parts): fun, method, args = parts[0], parts[1], parts[2] if method is None: return ast.FunctionCall(fun, list(args)) else: return ast.MethodCall(fun, method, list(args))
arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) # noqa + Optional(E + Optional(arithSign) + Word(nums))) realNum.setParseAction(lambda x: expression.NumericLiteral(float(x[0]))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) intNum.setParseAction(lambda x: expression.NumericLiteral(int(x[0]))) number = realNum | intNum variable = ident.copy() variable.setParseAction(lambda x: model.Var(x[0])) quotedString.setParseAction(lambda x: expression.StringLiteral(x[0][1:-1])) literal = quotedString | number valueref = variable | literal def mkterm(x): return model.Term(x) term = (predicate + drop("(") + Group(delimitedList(valueref, ",")) + drop(")")).setParseAction(mkterm)
def SPICE_BNF(): global bnf if not bnf: # punctuation colon = Literal(":").suppress() lbrace = Literal("{").suppress() rbrace = Literal("}").suppress() lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() lparen = Literal("(").suppress() rparen = Literal(")").suppress() equals = Literal("=").suppress() comma = Literal(",").suppress() semi = Literal(";").suppress() # primitive types int8_ = Keyword("int8").setParseAction(replaceWith(ptypes.int8)) uint8_ = Keyword("uint8").setParseAction(replaceWith(ptypes.uint8)) int16_ = Keyword("int16").setParseAction(replaceWith(ptypes.int16)) uint16_ = Keyword("uint16").setParseAction(replaceWith(ptypes.uint16)) int32_ = Keyword("int32").setParseAction(replaceWith(ptypes.int32)) uint32_ = Keyword("uint32").setParseAction(replaceWith(ptypes.uint32)) int64_ = Keyword("int64").setParseAction(replaceWith(ptypes.int64)) uint64_ = Keyword("uint64").setParseAction(replaceWith(ptypes.uint64)) # keywords channel_ = Keyword("channel") enum32_ = Keyword("enum32").setParseAction(replaceWith(32)) enum16_ = Keyword("enum16").setParseAction(replaceWith(16)) enum8_ = Keyword("enum8").setParseAction(replaceWith(8)) flags32_ = Keyword("flags32").setParseAction(replaceWith(32)) flags16_ = Keyword("flags16").setParseAction(replaceWith(16)) flags8_ = Keyword("flags8").setParseAction(replaceWith(8)) channel_ = Keyword("channel") server_ = Keyword("server") client_ = Keyword("client") protocol_ = Keyword("protocol") typedef_ = Keyword("typedef") struct_ = Keyword("struct") message_ = Keyword("message") image_size_ = Keyword("image_size") bytes_ = Keyword("bytes") cstring_ = Keyword("cstring") switch_ = Keyword("switch") default_ = Keyword("default") case_ = Keyword("case") identifier = Word(alphas, alphanums + "_") enumname = Word(alphanums + "_") integer = ( Combine(CaselessLiteral("0x") + Word(nums + "abcdefABCDEF")) | Word(nums + "+-", nums)).setName("int").setParseAction(cvtInt) typename = identifier.copy().setParseAction( lambda toks: ptypes.TypeRef(str(toks[0]))) # This is just normal "types", i.e. not channels or messages typeSpec = Forward() attributeValue = integer ^ identifier attribute = Group( Combine("@" + identifier) + Optional(lparen + delimitedList(attributeValue) + rparen)) attributes = Group(ZeroOrMore(attribute)) arraySizeSpecImage = Group(image_size_ + lparen + integer + comma + identifier + comma + identifier + rparen) arraySizeSpecBytes = Group(bytes_ + lparen + identifier + comma + identifier + rparen) arraySizeSpecCString = Group(cstring_ + lparen + rparen) arraySizeSpec = lbrack + Optional( identifier ^ integer ^ arraySizeSpecImage ^ arraySizeSpecBytes ^ arraySizeSpecCString, default="") + rbrack variableDef = Group(typeSpec + Optional("*", default=None) + identifier + Optional(arraySizeSpec, default=None) + attributes - semi) \ .setParseAction(parseVariableDef) switchCase = Group(Group(OneOrMore(default_.setParseAction(replaceWith(None)) + colon | Group(case_.suppress() + Optional("!", default="") + identifier) + colon)) + variableDef) \ .setParseAction(lambda toks: ptypes.SwitchCase(toks[0][0], toks[0][1])) switchBody = Group(switch_ + lparen + delimitedList(identifier,delim='.', combine=True) + rparen + lbrace + Group(OneOrMore(switchCase)) + rbrace + identifier + attributes - semi) \ .setParseAction(lambda toks: ptypes.Switch(toks[0][1], toks[0][2], toks[0][3], toks[0][4])) messageBody = structBody = Group(lbrace + ZeroOrMore(variableDef | switchBody) + rbrace) structSpec = Group(struct_ + identifier + structBody + attributes).setParseAction( lambda toks: ptypes.StructType( toks[0][1], toks[0][2], toks[0][3])) # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "channel_type" typeSpec << (structSpec ^ int8_ ^ uint8_ ^ int16_ ^ uint16_ ^ int32_ ^ uint32_ ^ int64_ ^ uint64_ ^ typename).setName("type") flagsBody = enumBody = Group( lbrace + delimitedList(Group(enumname + Optional(equals + integer))) + Optional(comma) + rbrace) messageSpec = Group(message_ + messageBody + attributes ).setParseAction(lambda toks: ptypes.MessageType( None, toks[0][1], toks[0][2])) | typename channelParent = Optional(colon + typename, default=None) channelMessage = Group(messageSpec + identifier + Optional(equals + integer, default=None) + semi) \ .setParseAction(lambda toks: ptypes.ChannelMember(toks[0][1], toks[0][0], toks[0][2])) channelBody = channelParent + Group(lbrace + ZeroOrMore( server_ + colon | client_ + colon | channelMessage) + rbrace) enum_ = (enum32_ | enum16_ | enum8_) flags_ = (flags32_ | flags16_ | flags8_) enumDef = Group(enum_ + identifier + enumBody + attributes - semi).setParseAction(lambda toks: ptypes.EnumType( toks[0][0], toks[0][1], toks[0][2], toks[0][3])) flagsDef = Group(flags_ + identifier + flagsBody + attributes - semi).setParseAction(lambda toks: ptypes.FlagsType( toks[0][0], toks[0][1], toks[0][2], toks[0][3])) messageDef = Group(message_ + identifier + messageBody + attributes - semi).setParseAction( lambda toks: ptypes.MessageType( toks[0][1], toks[0][2], toks[0][3])) channelDef = Group(channel_ + identifier + channelBody - semi).setParseAction( lambda toks: ptypes.ChannelType( toks[0][1], toks[0][2], toks[0][3])) structDef = Group(struct_ + identifier + structBody + attributes - semi).setParseAction(lambda toks: ptypes.StructType( toks[0][1], toks[0][2], toks[0][3])) typedefDef = Group(typedef_ + identifier + typeSpec + attributes - semi).setParseAction(lambda toks: ptypes.TypeAlias( toks[0][1], toks[0][2], toks[0][3])) definitions = typedefDef | structDef | enumDef | flagsDef | messageDef | channelDef protocolChannel = Group(typename + identifier + Optional(equals + integer, default=None) + semi) \ .setParseAction(lambda toks: ptypes.ProtocolMember(toks[0][1], toks[0][0], toks[0][2])) protocolDef = Group(protocol_ + identifier + Group(lbrace + ZeroOrMore(protocolChannel) + rbrace) + semi) \ .setParseAction(lambda toks: ptypes.ProtocolType(toks[0][1], toks[0][2])) bnf = ZeroOrMore(definitions) + protocolDef + StringEnd() singleLineComment = "//" + restOfLine bnf.ignore(singleLineComment) bnf.ignore(cStyleComment) return bnf
binop = oneOf(binopstr) arithSign = Word("+-", exact=1) realNum = Combine(Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) # noqa + Optional(E + Optional(arithSign) + Word(nums))) realNum.setParseAction(lambda x: expression.NumericLiteral(float(x[0]))) intNum = Combine(Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) intNum.setParseAction(lambda x: expression.NumericLiteral(int(x[0]))) number = realNum | intNum variable = ident.copy() variable.setParseAction(lambda x: model.Var(x[0])) quotedString.setParseAction(lambda x: expression.StringLiteral(x[0][1:-1])) literal = quotedString | number valueref = variable | literal def mkterm(x): return model.Term(x) term = (predicate + drop("(") + Group(delimitedList(valueref, ","))
def init_grammar(self): """Set up the parsing classes Any changes to the grammar of the config file be done here. """ # Some syntax that we need, but don't care about SEMICOLON = (Suppress(";")) EQUALS = Suppress("=") # Top Section FILE_NAME = Word(alphas + nums + '-_.') alignment_def = Keyword('alignment') + EQUALS\ + FILE_NAME + SEMICOLON alignment_def.setParseAction(self.set_alignment) tree_def = Keyword('user_tree_topology') + EQUALS\ + FILE_NAME + SEMICOLON tree_def.setParseAction(self.set_user_tree) def simple_option(name): opt = Keyword(name) + EQUALS +\ Word(alphas + nums + '-_') + SEMICOLON opt.setParseAction(self.set_simple_option) return opt branch_def = simple_option('branchlengths') MODEL_NAME = Word(alphas + nums + '+' + ' ' + '_') model_list = delimitedList(MODEL_NAME) model_def = 'models' + EQUALS + model_list + SEMICOLON model_def.setParseAction(self.set_models) model_selection_def = simple_option("model_selection") top_section = alignment_def + Optional(tree_def) + branch_def + \ model_def + model_selection_def # Data Block Parsing column = Word(nums) block_name = Word(alphas + '_-' + nums) block_def = column("start") +\ Optional(Suppress("-") + column("end")) +\ Optional(Suppress("\\") + column("step")) block_def.setParseAction(self.define_range) block_list_def = Group(OneOrMore(Group(block_def))) user_subset_def = Optional("charset") + block_name("name") + \ EQUALS + block_list_def("parts") + SEMICOLON user_subset_def.setParseAction(self.define_user_subset) block_def_list = OneOrMore(Group(user_subset_def)) block_section = Suppress("[data_blocks]") + block_def_list block_def_list.setParseAction(self.check_blocks) # Scheme Parsing scheme_name = Word(alphas + '_-' + nums) # Make a copy, cos we set a different action on it user_subset_ref = block_name.copy() user_subset_ref.setParseAction(self.check_block_exists) subset = Group( Suppress("(") + delimitedList(user_subset_ref("name")) + Suppress(")")) subset.setParseAction(self.define_subset_grouping) scheme = Group(OneOrMore(subset)) scheme_def = scheme_name("name") + \ EQUALS + scheme("scheme") + SEMICOLON scheme_def.setParseAction(self.define_scheme) scheme_list = OneOrMore(Group(scheme_def)) scheme_algo = simple_option("search") scheme_section = \ Suppress("[schemes]") + scheme_algo + Optional(scheme_list) # We've defined the grammar for each section. # Here we just put it all together self.config_parser = (top_section + block_section + scheme_section + stringEnd)
conditionals << Optional(not_logic) + conditional + ZeroOrMore(binary_logic + conditional) if_keyword = Literal('if') if_statement = (if_keyword + conditionals).setParseAction(IfStatement) option = Forward() response_definition = text + lineEnd.suppress() + Optional(if_statement + Literal('then').suppress()) response = (response_definition + ZeroOrMore(indentedBlock(option, indentStack, True))).setParseAction(Response) event_send = Literal('->') event_message_separator = Literal('!').suppress() event_atom = atom.copy().setParseAction(lambda t: repr(t[0])) event_message = quotedString | event_atom event_send_separator = Literal(',').suppress() event_statement = (event_send + event_atom + event_message_separator + event_message).setParseAction(Event) options_delimiter = Literal('~') options_definition = options_delimiter + text + Optional(event_statement + ZeroOrMore(event_send_separator + event_statement)) option << (options_definition + ZeroOrMore(indentedBlock(response, indentStack, True))).setParseAction(Option) dialog_begin = Literal('begin').suppress() + Group(atom + Optional(Literal('extends').suppress() + atom)) dialog_end = Literal('end').suppress() dialog = (dialog_begin + ZeroOrMore(indentedBlock(response, indentStack, True)) + dialog_end).setParseAction(Dialog) dialogs = ZeroOrMore(indentedBlock(dialog, indentStack, False))
def init_grammar(self): """Set up the parsing classes Any changes to the grammar of the config file be done here. """ # Some syntax that we need, but don't bother looking at SEMICOLON = (Suppress(";")) EQUALS = Suppress("=") OPENB = Suppress("(") CLOSEB = Suppress(")") BACKSLASH = Suppress("\\") DASH = Suppress("-") # Top Section FILENAME = Word(alphas + nums + '-_.') alignmentdef = Keyword('alignment') + EQUALS + FILENAME + SEMICOLON alignmentdef.setParseAction(self.set_alignment) treedef = Keyword('user_tree_topology') + EQUALS + FILENAME + SEMICOLON treedef.setParseAction(self.set_user_tree) def simple_option(name): opt = Keyword(name) + EQUALS + Word(alphas + nums + '-_') + SEMICOLON opt.setParseAction(self.set_simple_option) return opt branchdef = simple_option('branchlengths') MODELNAME = Word(alphas + nums + '+') modellist = delimitedList(MODELNAME) modeldef = Keyword("models") + EQUALS + Group( ( CaselessKeyword("all") | CaselessKeyword("mrbayes") | CaselessKeyword("raxml") | CaselessKeyword("beast") | CaselessKeyword("all_protein") | CaselessKeyword( "all_protein_gamma") | CaselessKeyword("all_protein_gammaI") )("predefined") | Group(modellist)("userlist")) + SEMICOLON modeldef.setParseAction(self.set_models) modseldef = simple_option("model_selection") topsection = alignmentdef + Optional(treedef) + branchdef + \ modeldef + modseldef # Partition Parsing column = Word(nums) partname = Word(alphas + '_-' + nums) partdef = column("start") +\ Optional(DASH + column("end")) +\ Optional(BACKSLASH + column("step")) partdef.setParseAction(self.define_range) partdeflist = Group(OneOrMore(Group(partdef))) partition = Optional("charset") + partname("name") + \ EQUALS + partdeflist("parts") + SEMICOLON partition.setParseAction(self.define_partition) partlist = OneOrMore(Group(partition)) partsection = Suppress("[data_blocks]") + partlist # Scheme Parsing schemename = Word(alphas + '_-' + nums) partnameref = partname.copy( ) # Make a copy, cos we set a different action on it partnameref.setParseAction(self.check_part_exists) subset = Group(OPENB + delimitedList(partnameref("name")) + CLOSEB) subset.setParseAction(self.define_subset) scheme = Group(OneOrMore(subset)) schemedef = schemename("name") + \ EQUALS + scheme("scheme") + SEMICOLON schemedef.setParseAction(self.define_schema) schemelist = OneOrMore(Group(schemedef)) schemealgo = simple_option("search") schemesection = \ Suppress("[schemes]") + schemealgo + Optional(schemelist) # We've defined the grammar for each section. Here we just put it all together self.config_parser = ( topsection + partsection + schemesection + stringEnd)
def group_list(toks): x = to_list(toks) try: if x and x[0] == "list": return [x] else: return x except TypeError: return x PROCBASE = ( (FollowedBy(BEXPR) + BEXPR + Suppress("->") + Group(PROC)).setParseAction( to_sexpr("#guard")).setName("guarded") | (Keyword("Skip").setParseAction(konst(["skip"]))) | IDENTIFIER.copy().setParseAction(to_sexpr("#call")).setName("call") | (delimitedList(VARREF).setParseAction(group_list) + Suppress("<--") + delimitedList(ungroup(EXPR)).setParseAction(group_list)).setParseAction( to_sexpr("assign-env")).setName("assign-env") | (delimitedList(VARREF).setParseAction(group_list) + Suppress("<~") + delimitedList(ungroup(EXPR)).setParseAction(group_list)).setParseAction( to_sexpr("assign-lstig")).setName("assign-lstig") | (delimitedList(VARREF).setParseAction(group_list) + Suppress("<-") + delimitedList(ungroup(EXPR)).setParseAction(group_list)).setParseAction( to_sexpr("assign-attr")).setName("assign-attr")) PAREN = (LPAR + Group(PROC) + RPAR).setName("__paren__") SEQ = (delimitedList(PAREN | Group(PROCBASE), SEMICOLON)).setParseAction( combinator("#seq")) # noqa: E501 CHOICE = delimitedList(Group(SEQ), Literal("++")).setParseAction( combinator("#choice")) # noqa: E501
def __init__(self): # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) asToken = Keyword("as", caseless=True) whereToken = Keyword("where", caseless=True) semicolon = Literal(";") ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = delimitedList( ident, ".", combine=True ) #columnName.setParseAction(upcaseTokens) columnNameList = Group( columnName + ZeroOrMore("," + columnName)) # selectableList = Forward() columnRvalList = Forward() functionExpr = ident + Optional("."+ident) + Literal('(') + columnRvalList + Literal(')') alias = Forward() identExpr = functionExpr | ident self.identExpr = identExpr # Debug self.functionExpr = functionExpr # Debug alias = ident.copy() selectableName = identExpr | columnName selectableList = Group( selectableName + ZeroOrMore(","+selectableName)) columnRef = columnName functionSpec = functionExpr valueExprPrimary = functionSpec | columnRef numPrimary = valueExprPrimary ## | numericValFunc factor = Optional(Literal("+") | Literal("-")) + numPrimary muldiv = oneOf("* /") term = Forward() term << factor + Optional(muldiv + factor) numericExpr = Forward() addsub = oneOf("+ -") numericExpr << term + Optional(addsub + numericExpr) arithop = oneOf("+ - * /") columnNumericExpr = Forward() cTerm = valueExprPrimary testme = valueExprPrimary + arithop + valueExprPrimary columnNumericExpr << cTerm + Optional(arithop + columnNumericExpr) colNumExpList = Group( columnNumericExpr + ZeroOrMore(","+columnNumericExpr)) valueExpr = numericExpr ## | stringExpr | dateExpr | intervalExpr derivedColumn = valueExpr + Optional(asToken + alias) selectSubList = derivedColumn + ZeroOrMore("," + derivedColumn) tableName = delimitedList( ident, ".", combine=True ) # don't upcase table names anymore # tableName.setParseAction(upcaseTokens) self.tableAction = [] tableName.addParseAction(self.actionWrapper(self.tableAction)) tableName.setResultsName("table") tableAlias = tableName + asToken + ident.setResultsName("aliasName") tableAlias.setResultsName("alias") genericTableName = tableAlias | tableName genericTableName = genericTableName.setResultsName("tablename") tableNameList = Group( genericTableName + ZeroOrMore("," + genericTableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) between_ = Keyword("between", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) # need to add support for alg expressions columnRval = realNum | intNum | quotedString | columnNumericExpr# | numericExpr columnRvalList << Group( columnRval + ZeroOrMore("," + columnRval)) self.whereExpAction = [] namedRv = columnRval.setResultsName("column") whereConditionFlat = Group( ( functionSpec + binop + columnRval) | ( namedRv + binop + columnRval ) | ( namedRv + in_ + "(" + columnRval + ZeroOrMore(","+namedRv) + ")" ) | ( namedRv + in_ + "(" + selectStmt + ")" ) | ( namedRv + between_ + namedRv + and_ + namedRv ) ) whereConditionFlat.addParseAction(self.actionWrapper(self.whereExpAction)) whereCondition = Group(whereConditionFlat | ( "(" + whereExpression + ")" )) # Test code to try to make an expression parse. # print whereConditionFlat.parseString("ABS(o1.ra - o2.ra) < 0.00083 / COS(RADIANS(o2.decl))") # goodFunction = ident + Literal('(') + columnNumericExpr + Literal(')') # print "ADFDSFDSF",testme.parseString("o1.ra - o2.ra", parseAll=True) # print "ADSFDSFAD", goodFunction.parseString("ABS(o1.ra - o2.ra)") #whereExpression << whereCondition.setResultsName("wherecond") #+ ZeroOrMore( ( and_ | or_ ) + whereExpression ) def scAnd(tok): print "scAnd", tok if "TRUE" == tok[0][0]: tok = tok[2] elif "TRUE" == tok[2][0]: tok = tok[0] return tok def scOr(tok): print "scOr", tok if ("TRUE" == tok[0][0]) or ("TRUE" == tok[2][0]): tok = [["TRUE"]] return tok def scWhere(tok): newtok = [] i = 0 while i < len(tok): if str(tok[i]) in ["TRUE",str(["TRUE"])] and (i+1) < len(tok): if str(tok[i+1]).upper() == "AND": i += 2 continue elif str(tok[i+i]).upper() == "OR": break newtok.append(tok[i]) i += 1 return newtok def collapseWhere(tok): #collapse.append(tok[0][1]) if ["TRUE"] == tok.asList()[0][1]: tok = [] return tok andExpr = and_ + whereExpression orExpr = or_ + whereExpression whereExpression << whereCondition + ZeroOrMore( andExpr | orExpr) whereExpression.addParseAction(scWhere) self.selectPart = selectToken + ( '*' | selectSubList ).setResultsName( "columns" ) whereClause = Group(whereToken + whereExpression).setResultsName("where") whereClause.addParseAction(collapseWhere) self.fromPart = fromToken + tableNameList.setResultsName("tables") # define the grammar selectStmt << ( self.selectPart + fromToken + tableNameList.setResultsName( "tables" ) + whereClause) self.simpleSQL = selectStmt + semicolon # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine self.simpleSQL.ignore( oracleSqlComment )
def init_grammar(self): """Set up the parsing classes Any changes to the grammar of the config file be done here. """ # Some syntax that we need, but don't bother looking at SEMICOLON = (Suppress(";")) EQUALS = Suppress("=") OPENB = Suppress("(") CLOSEB = Suppress(")") BACKSLASH = Suppress("\\") DASH = Suppress("-") # Top Section FILENAME = Word(alphas + nums + '-_.') alignmentdef = Keyword('alignment') + EQUALS + FILENAME + SEMICOLON alignmentdef.setParseAction(self.set_alignment) treedef = Keyword('user_tree_topology') + EQUALS + FILENAME + SEMICOLON treedef.setParseAction(self.set_user_tree) def simple_option(name): opt = Keyword(name) + EQUALS + Word(alphas + nums + '-_') + SEMICOLON opt.setParseAction(self.set_simple_option) return opt branchdef = simple_option('branchlengths') MODELNAME = Word(alphas + nums + '+') modellist = delimitedList(MODELNAME) modeldef = Keyword("models") + EQUALS + Group( (CaselessKeyword("all") | CaselessKeyword("mrbayes") | CaselessKeyword("raxml") | CaselessKeyword("beast") | CaselessKeyword("all_protein") | CaselessKeyword("all_protein_gamma") | CaselessKeyword("all_protein_gammaI"))("predefined") | Group(modellist)("userlist")) + SEMICOLON modeldef.setParseAction(self.set_models) modseldef = simple_option("model_selection") topsection = alignmentdef + Optional(treedef) + branchdef + \ modeldef + modseldef # Partition Parsing column = Word(nums) partname = Word(alphas + '_-' + nums) partdef = column("start") +\ Optional(DASH + column("end")) +\ Optional(BACKSLASH + column("step")) partdef.setParseAction(self.define_range) partdeflist = Group(OneOrMore(Group(partdef))) partition = Optional("charset") + partname("name") + \ EQUALS + partdeflist("parts") + SEMICOLON partition.setParseAction(self.define_partition) partlist = OneOrMore(Group(partition)) partsection = Suppress("[data_blocks]") + partlist # Scheme Parsing schemename = Word(alphas + '_-' + nums) partnameref = partname.copy( ) # Make a copy, cos we set a different action on it partnameref.setParseAction(self.check_part_exists) subset = Group(OPENB + delimitedList(partnameref("name")) + CLOSEB) subset.setParseAction(self.define_subset) scheme = Group(OneOrMore(subset)) schemedef = schemename("name") + \ EQUALS + scheme("scheme") + SEMICOLON schemedef.setParseAction(self.define_schema) schemelist = OneOrMore(Group(schemedef)) schemealgo = simple_option("search") schemesection = \ Suppress("[schemes]") + schemealgo + Optional(schemelist) # We've defined the grammar for each section. Here we just put it all together self.config_parser = (topsection + partsection + schemesection + stringEnd)
name_eq = Literal('name') + '=' + quotedString bind_keyword = Keyword('bind').setParseAction(lambda s, loc, toks: [' bind']) bind_attr = bind_keyword + Literal('(') + 'c' + Optional(comma + name_eq) + ')' result_attr = Literal('result') + '(' + NAME + ')' result_attr.setParseAction(lambda s, loc, toks: [' ' + ''.join(toks)]) func_post = eachMostOnce(bind_attr, result_attr) elem_attr = Keyword('elemental').setName('elemental') elem_attr.setParseAction(lambda s, loc, toks: [toks[0] + ' ']) pure_attr = Keyword('pure').setName('pure') pure_attr.setParseAction(lambda s, loc, toks: [toks[0] + ' ']) recu_attr = Keyword('recursive').setName('elemental') recu_attr.setParseAction(lambda s, loc, toks: [toks[0] + ' ']) func_pre = eachMostOnce(elem_attr, pure_attr, recu_attr) func_name = NAME.copy().setName('func_name') func_name.setParseAction(lambda s, loc, toks: [' ' + toks[0]]) func_def = Optional(func_pre) + Keyword('function') + func_name + arglist \ + Optional(func_post) + EOLL func_def.setParseAction( lambda s, loc, toks: [''.join(toks), '', 'end function']) subr_def = Keyword('subroutine') + func_name + arglist + Optional( bind_attr) + EOLL subr_def.setParseAction( lambda s, loc, toks: [''.join(toks), '', 'end subroutine']) inte_name = NAME.copy().setParseAction(lambda s, loc, toks: [' ' + toks[0]]) inte_assignment = Literal('assignment') + '(' + Literal('=') + ')'
def SPICE_BNF(): global bnf if not bnf: # punctuation colon = Literal(":").suppress() lbrace = Literal("{").suppress() rbrace = Literal("}").suppress() lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() lparen = Literal("(").suppress() rparen = Literal(")").suppress() equals = Literal("=").suppress() comma = Literal(",").suppress() semi = Literal(";").suppress() # primitive types int8_ = Keyword("int8").setParseAction(replaceWith(ptypes.int8)) uint8_ = Keyword("uint8").setParseAction(replaceWith(ptypes.uint8)) int16_ = Keyword("int16").setParseAction(replaceWith(ptypes.int16)) uint16_ = Keyword("uint16").setParseAction(replaceWith(ptypes.uint16)) int32_ = Keyword("int32").setParseAction(replaceWith(ptypes.int32)) uint32_ = Keyword("uint32").setParseAction(replaceWith(ptypes.uint32)) int64_ = Keyword("int64").setParseAction(replaceWith(ptypes.int64)) uint64_ = Keyword("uint64").setParseAction(replaceWith(ptypes.uint64)) # keywords enum32_ = Keyword("enum32").setParseAction(replaceWith(32)) enum16_ = Keyword("enum16").setParseAction(replaceWith(16)) enum8_ = Keyword("enum8").setParseAction(replaceWith(8)) flags32_ = Keyword("flags32").setParseAction(replaceWith(32)) flags16_ = Keyword("flags16").setParseAction(replaceWith(16)) flags8_ = Keyword("flags8").setParseAction(replaceWith(8)) channel_ = Keyword("channel") server_ = Keyword("server") client_ = Keyword("client") protocol_ = Keyword("protocol") typedef_ = Keyword("typedef") struct_ = Keyword("struct") message_ = Keyword("message") image_size_ = Keyword("image_size") bytes_ = Keyword("bytes") cstring_ = Keyword("cstring") switch_ = Keyword("switch") default_ = Keyword("default") case_ = Keyword("case") identifier = Word(alphas, alphanums + "_") enumname = Word(alphanums + "_") integer = ( (Combine(CaselessLiteral("0x") + Word(nums + "abcdefABCDEF")) | Word(nums + "+-", nums)) .setName("int") .setParseAction(cvtInt) ) typename = identifier.copy().setParseAction(lambda toks: ptypes.TypeRef(str(toks[0]))) # This is just normal "types", i.e. not channels or messages typeSpec = Forward() attributeValue = integer ^ identifier attribute = Group(Combine("@" + identifier) + Optional(lparen + delimitedList(attributeValue) + rparen)) attributes = Group(ZeroOrMore(attribute)) arraySizeSpecImage = Group(image_size_ + lparen + integer + comma + identifier + comma + identifier + rparen) arraySizeSpecBytes = Group(bytes_ + lparen + identifier + comma + identifier + rparen) arraySizeSpecCString = Group(cstring_ + lparen + rparen) arraySizeSpec = ( lbrack + Optional( identifier ^ integer ^ arraySizeSpecImage ^ arraySizeSpecBytes ^ arraySizeSpecCString, default="" ) + rbrack ) variableDef = Group( typeSpec + Optional("*", default=None) + identifier + Optional(arraySizeSpec, default=None) + attributes - semi ).setParseAction(parseVariableDef) switchCase = Group( Group( OneOrMore( default_.setParseAction(replaceWith(None)) + colon | Group(case_.suppress() + Optional("!", default="") + identifier) + colon ) ) + variableDef ).setParseAction(lambda toks: ptypes.SwitchCase(toks[0][0], toks[0][1])) switchBody = Group( switch_ + lparen + delimitedList(identifier, delim=".", combine=True) + rparen + lbrace + Group(OneOrMore(switchCase)) + rbrace + identifier + attributes - semi ).setParseAction(lambda toks: ptypes.Switch(toks[0][1], toks[0][2], toks[0][3], toks[0][4])) messageBody = structBody = Group(lbrace + ZeroOrMore(variableDef | switchBody) + rbrace) structSpec = Group(struct_ + identifier + structBody + attributes).setParseAction( lambda toks: ptypes.StructType(toks[0][1], toks[0][2], toks[0][3]) ) # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "channel_type" typeSpec << ( structSpec ^ int8_ ^ uint8_ ^ int16_ ^ uint16_ ^ int32_ ^ uint32_ ^ int64_ ^ uint64_ ^ typename ).setName("type") flagsBody = enumBody = Group( lbrace + delimitedList(Group(enumname + Optional(equals + integer))) + Optional(comma) + rbrace ) messageSpec = ( Group(message_ + messageBody + attributes).setParseAction( lambda toks: ptypes.MessageType(None, toks[0][1], toks[0][2]) ) | typename ) channelParent = Optional(colon + typename, default=None) channelMessage = Group( messageSpec + identifier + Optional(equals + integer, default=None) + semi ).setParseAction(lambda toks: ptypes.ChannelMember(toks[0][1], toks[0][0], toks[0][2])) channelBody = channelParent + Group( lbrace + ZeroOrMore(server_ + colon | client_ + colon | channelMessage) + rbrace ) enum_ = enum32_ | enum16_ | enum8_ flags_ = flags32_ | flags16_ | flags8_ enumDef = Group(enum_ + identifier + enumBody + attributes - semi).setParseAction( lambda toks: ptypes.EnumType(toks[0][0], toks[0][1], toks[0][2], toks[0][3]) ) flagsDef = Group(flags_ + identifier + flagsBody + attributes - semi).setParseAction( lambda toks: ptypes.FlagsType(toks[0][0], toks[0][1], toks[0][2], toks[0][3]) ) messageDef = Group(message_ + identifier + messageBody + attributes - semi).setParseAction( lambda toks: ptypes.MessageType(toks[0][1], toks[0][2], toks[0][3]) ) channelDef = Group(channel_ + identifier + channelBody + attributes - semi).setParseAction( lambda toks: ptypes.ChannelType(toks[0][1], toks[0][2], toks[0][3], toks[0][4]) ) structDef = Group(struct_ + identifier + structBody + attributes - semi).setParseAction( lambda toks: ptypes.StructType(toks[0][1], toks[0][2], toks[0][3]) ) typedefDef = Group(typedef_ + identifier + typeSpec + attributes - semi).setParseAction( lambda toks: ptypes.TypeAlias(toks[0][1], toks[0][2], toks[0][3]) ) definitions = typedefDef | structDef | enumDef | flagsDef | messageDef | channelDef protocolChannel = Group(typename + identifier + Optional(equals + integer, default=None) + semi).setParseAction( lambda toks: ptypes.ProtocolMember(toks[0][1], toks[0][0], toks[0][2]) ) protocolDef = Group( protocol_ + identifier + Group(lbrace + ZeroOrMore(protocolChannel) + rbrace) + semi ).setParseAction(lambda toks: ptypes.ProtocolType(toks[0][1], toks[0][2])) bnf = ZeroOrMore(definitions) + protocolDef + StringEnd() singleLineComment = "//" + restOfLine bnf.ignore(singleLineComment) bnf.ignore(cStyleComment) return bnf
def RawOutputSpecParser(): '''Syntax of the OUTPUT statement (and nothing else).''' with PyParsingDefaultWhitespaceChars(DEFAULT_WHITESPACE_CHARS): OUTPUT = CaselessKeyword('OUTPUT').suppress() QUERY = CaselessKeyword('query').suppress() INDEX = CaselessKeyword('index').suppress() KEY = CaselessKeyword('key').suppress() CONTENT = CaselessKeyword('content').suppress() SET = CaselessKeyword('set').suppress() SEQUENCE = CaselessKeyword('sequence').suppress() DICTIONARY = CaselessKeyword('dictionary').suppress() NOT = CaselessKeyword('not').suppress() constant = integer | QuotedString('"', escChar='\\') constant.setParseAction( lambda t: o.Constant(t[0]) ) # not strictly necessary to wrap this, but it simplifies working with the syntax tree asp_variable_name = Word(alphas_uppercase, alphanums + '_') asp_variable_anonymous = Keyword('_') asp_variable = asp_variable_anonymous | asp_variable_name asp_variable_expr = asp_variable_name.copy() # asp_variable_name.setParseAction(lambda t: asp.Variable(t[0])) asp_variable_anonymous.setParseAction( lambda t: asp.AnonymousVariable()) asp_variable_expr.setParseAction(lambda t: o.Variable(t[0])) # TODO: # Instead of explicitly marking references with '&', we might just define a convention as follows: # * Output names start with lowercase characters # * ASP variables start with uppercase characters (as they do in actual ASP code) reference = amp + py_identifier reference.setParseAction(lambda t: o.Reference(t[0]) ) # to distinguish from literal string values # Note: must be able to distinguish between unquoted and quoted constants asp_constant_symbol = Word(alphas_lowercase, alphanums + '_') asp_quoted_string = QuotedString('"', escChar='\\') asp_quoted_string.setParseAction(lambda t: asp.QuotedConstant(t[0])) term = (asp_constant_symbol | asp_quoted_string | asp_variable | positive_integer).setResultsName('terms', listAllMatches=True) terms = Optional(term + ZeroOrMore(comma + term)) classical_atom = predicate_name('predicate') + Optional(lpar + terms + rpar) # Builtin atoms builtin_op_binary = (Literal('=') | '==' | '!=' | '<>' | '<' | '<=' | '>' | '>=' | '#succ').setResultsName('predicate') builtin_atom_binary = term + builtin_op_binary + term builtin_atom_binary_prefix = builtin_op_binary + lpar + term + comma + term + rpar builtin_atom = builtin_atom_binary | builtin_atom_binary_prefix # body_atom = classical_atom | builtin_atom pos_body_atom = body_atom.copy() neg_body_atom = NOT + body_atom pos_body_atom.setParseAction( lambda t: asp.Literal(t.predicate, tuple(t.terms), False)) neg_body_atom.setParseAction( lambda t: asp.Literal(t.predicate, tuple(t.terms), True)) body_literal = neg_body_atom | pos_body_atom # asp_query = Group(body_literal + ZeroOrMore(comma + body_literal)) asp_query.setParseAction(lambda t: asp.Query(tuple(t[0]))) expr = Forward() # TODO: Instead of semicolon, we could use (semicolon | FollowedBy(rbrace)) to make the last semicolon optional (but how would that work with asp_query...) query_clause = QUERY + colon + asp_query('query') + semicolon content_clause = CONTENT + colon + expr('content') + semicolon index_clause = INDEX + colon + asp_variable_expr('index') + semicolon key_clause = KEY + colon + expr('key') + semicolon # simple_set_spec = SET + lbrace + predicate_name( 'predicate') + slash + positive_integer('arity') + Optional( rightarrow + py_qualified_identifier('constructor')) + rbrace set_spec = SET + lbrace + (query_clause & content_clause) + rbrace # TODO: add clause like "at_missing_index: skip;", "at_missing_index: 0;", "at_missing_index: None;" sequence_spec = SEQUENCE + lbrace + (query_clause & content_clause & index_clause) + rbrace dictionary_spec = DICTIONARY + lbrace + (query_clause & content_clause & key_clause) + rbrace expr_collection = set_spec | simple_set_spec | sequence_spec | dictionary_spec # simple_set_spec.setParseAction(lambda t: o.ExprSimpleSet( t.predicate, t.arity, t.get('constructor'))) set_spec.setParseAction(lambda t: o.ExprSet(t.query, t.content)) sequence_spec.setParseAction( lambda t: o.ExprSequence(t.query, t.content, t.index)) dictionary_spec.setParseAction( lambda t: o.ExprDictionary(t.query, t.content, t.key)) expr_obj_args = Group( Optional(expr + ZeroOrMore(comma + expr) + Optional(comma))) expr_obj = Optional( py_qualified_identifier, default=None)('constructor') + lpar + expr_obj_args('args') + rpar # expr_obj.setParseAction(lambda t: o.ExprObject(t.constructor, t.args)) # Note: "|" always takes the first match, that's why we have to parse variable names after obj (otherwise "variable name" might consume the identifier of expr_obj) expr << (constant | expr_collection | expr_obj | reference | asp_variable_expr) named_output_spec = py_identifier('name') + equals + expr( 'expr') + semicolon output_statement = OUTPUT + lbrace + ZeroOrMore( named_output_spec) + rbrace # named_output_spec.setParseAction(lambda t: (t.name, t.expr)) output_statement.setParseAction(lambda t: o.OutputSpec(t)) return output_statement
def init_grammar(self): """Set up the parsing classes Any changes to the grammar of the config file be done here. """ # Some syntax that we need, but don't care about SEMICOLON = (Suppress(";")) EQUALS = Suppress("=") # Top Section FILE_NAME = Word(alphas + nums + '-_.') alignment_def = Keyword('alignment') + EQUALS\ + FILE_NAME + SEMICOLON alignment_def.setParseAction(self.set_alignment) tree_def = Keyword('user_tree_topology') + EQUALS\ + FILE_NAME + SEMICOLON tree_def.setParseAction(self.set_user_tree) def simple_option(name): opt = Keyword(name) + EQUALS +\ Word(alphas + nums + '-_') + SEMICOLON opt.setParseAction(self.set_simple_option) return opt branch_def = simple_option('branchlengths') MODEL_NAME = Word(alphas + nums + '+' + ' ' + '_') model_list = delimitedList(MODEL_NAME) model_def = 'models' + EQUALS + model_list + SEMICOLON model_def.setParseAction(self.set_models) model_selection_def = simple_option("model_selection") top_section = alignment_def + Optional(tree_def) + branch_def + \ model_def + model_selection_def # Data Block Parsing column = Word(nums) block_name = Word(alphas + '_-' + nums) block_def = column("start") +\ Optional(Suppress("-") + column("end")) +\ Optional(Suppress("\\") + column("step")) block_def.setParseAction(self.define_range) block_list_def = Group(OneOrMore(Group(block_def))) user_subset_def = Optional("charset") + block_name("name") + \ EQUALS + block_list_def("parts") + SEMICOLON user_subset_def.setParseAction(self.define_user_subset) block_def_list = OneOrMore(Group(user_subset_def)) block_section = Suppress("[data_blocks]") + block_def_list block_def_list.setParseAction(self.check_blocks) # Scheme Parsing scheme_name = Word(alphas + '_-' + nums) # Make a copy, cos we set a different action on it user_subset_ref = block_name.copy() user_subset_ref.setParseAction(self.check_block_exists) subset = Group(Suppress("(") + delimitedList(user_subset_ref("name")) + Suppress(")")) subset.setParseAction(self.define_subset_grouping) scheme = Group(OneOrMore(subset)) scheme_def = scheme_name("name") + \ EQUALS + scheme("scheme") + SEMICOLON scheme_def.setParseAction(self.define_scheme) scheme_list = OneOrMore(Group(scheme_def)) scheme_algo = simple_option("search") scheme_section = \ Suppress("[schemes]") + scheme_algo + Optional(scheme_list) # We've defined the grammar for each section. # Here we just put it all together self.config_parser = ( top_section + block_section + scheme_section + stringEnd)
def __init__(self): # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) asToken = Keyword("as", caseless=True) whereToken = Keyword("where", caseless=True) semicolon = Literal(";") ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = delimitedList(ident, ".", combine=True) #columnName.setParseAction(upcaseTokens) columnNameList = Group(columnName + ZeroOrMore("," + columnName)) # selectableList = Forward() columnRvalList = Forward() functionExpr = ident + Optional("." + ident) + Literal( '(') + columnRvalList + Literal(')') alias = Forward() identExpr = functionExpr | ident self.identExpr = identExpr # Debug self.functionExpr = functionExpr # Debug alias = ident.copy() selectableName = identExpr | columnName selectableList = Group(selectableName + ZeroOrMore("," + selectableName)) columnRef = columnName functionSpec = functionExpr valueExprPrimary = functionSpec | columnRef numPrimary = valueExprPrimary ## | numericValFunc factor = Optional(Literal("+") | Literal("-")) + numPrimary muldiv = oneOf("* /") term = Forward() term << factor + Optional(muldiv + factor) numericExpr = Forward() addsub = oneOf("+ -") numericExpr << term + Optional(addsub + numericExpr) arithop = oneOf("+ - * /") columnNumericExpr = Forward() cTerm = valueExprPrimary testme = valueExprPrimary + arithop + valueExprPrimary columnNumericExpr << cTerm + Optional(arithop + columnNumericExpr) colNumExpList = Group(columnNumericExpr + ZeroOrMore("," + columnNumericExpr)) valueExpr = numericExpr ## | stringExpr | dateExpr | intervalExpr derivedColumn = valueExpr + Optional(asToken + alias) selectSubList = derivedColumn + ZeroOrMore("," + derivedColumn) tableName = delimitedList(ident, ".", combine=True) # don't upcase table names anymore # tableName.setParseAction(upcaseTokens) self.tableAction = [] tableName.addParseAction(self.actionWrapper(self.tableAction)) tableName.setResultsName("table") tableAlias = tableName + asToken + ident.setResultsName("aliasName") tableAlias.setResultsName("alias") genericTableName = tableAlias | tableName genericTableName = genericTableName.setResultsName("tablename") tableNameList = Group(genericTableName + ZeroOrMore("," + genericTableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) between_ = Keyword("between", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) # need to add support for alg expressions columnRval = realNum | intNum | quotedString | columnNumericExpr # | numericExpr columnRvalList << Group(columnRval + ZeroOrMore("," + columnRval)) self.whereExpAction = [] namedRv = columnRval.setResultsName("column") whereConditionFlat = Group((functionSpec + binop + columnRval) | (namedRv + binop + columnRval) | (namedRv + in_ + "(" + columnRval + ZeroOrMore("," + namedRv) + ")") | (namedRv + in_ + "(" + selectStmt + ")") | (namedRv + between_ + namedRv + and_ + namedRv)) whereConditionFlat.addParseAction( self.actionWrapper(self.whereExpAction)) whereCondition = Group(whereConditionFlat | ("(" + whereExpression + ")")) # Test code to try to make an expression parse. # print whereConditionFlat.parseString("ABS(o1.ra - o2.ra) < 0.00083 / COS(RADIANS(o2.decl))") # goodFunction = ident + Literal('(') + columnNumericExpr + Literal(')') # print "ADFDSFDSF",testme.parseString("o1.ra - o2.ra", parseAll=True) # print "ADSFDSFAD", goodFunction.parseString("ABS(o1.ra - o2.ra)") #whereExpression << whereCondition.setResultsName("wherecond") #+ ZeroOrMore( ( and_ | or_ ) + whereExpression ) def scAnd(tok): print "scAnd", tok if "TRUE" == tok[0][0]: tok = tok[2] elif "TRUE" == tok[2][0]: tok = tok[0] return tok def scOr(tok): print "scOr", tok if ("TRUE" == tok[0][0]) or ("TRUE" == tok[2][0]): tok = [["TRUE"]] return tok def scWhere(tok): newtok = [] i = 0 while i < len(tok): if str(tok[i]) in ["TRUE", str(["TRUE"]) ] and (i + 1) < len(tok): if str(tok[i + 1]).upper() == "AND": i += 2 continue elif str(tok[i + i]).upper() == "OR": break newtok.append(tok[i]) i += 1 return newtok def collapseWhere(tok): #collapse.append(tok[0][1]) if ["TRUE"] == tok.asList()[0][1]: tok = [] return tok andExpr = and_ + whereExpression orExpr = or_ + whereExpression whereExpression << whereCondition + ZeroOrMore(andExpr | orExpr) whereExpression.addParseAction(scWhere) self.selectPart = selectToken + ( '*' | selectSubList).setResultsName("columns") whereClause = Group(whereToken + whereExpression).setResultsName("where") whereClause.addParseAction(collapseWhere) self.fromPart = fromToken + tableNameList.setResultsName("tables") # define the grammar selectStmt << (self.selectPart + fromToken + tableNameList.setResultsName("tables") + whereClause) self.simpleSQL = selectStmt + semicolon # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine self.simpleSQL.ignore(oracleSqlComment)