def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() tag_begin = Literal("<").suppress() tag_end = Literal(">").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' ival=Regex('[-]?\d+') dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?') lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)') # Helper definitions kstr= quotedString.setParseAction(removeQuotes) ^ \ dval ^ ival ^ lval ^ Word(prtable) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | tag_sect ) #| vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions ival.setParseAction(self.conv_ival) dval.setParseAction(self.conv_dval) lval.setParseAction(self.conv_lval) keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) tag_sect.setParseAction(self.add_sect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
class SDKConfig: """ Encapsulates an sdkconfig file. Defines grammar of a configuration entry, and enables evaluation of logical expressions involving those entries. """ # A configuration entry is in the form CONFIG=VALUE. Definitions of components of that grammar IDENTIFIER = Word(printables.upper()) HEX = Combine("0x" + Word(hexnums)).setParseAction(lambda t: int(t[0], 16)) DECIMAL = Combine(Optional(Literal("+") | Literal("-")) + Word(nums)).setParseAction(lambda t: int(t[0])) LITERAL = Word(printables) QUOTED_LITERAL = quotedString.setParseAction(removeQuotes) VALUE = HEX | DECIMAL | LITERAL | QUOTED_LITERAL # Operators supported by the expression evaluation OPERATOR = oneOf(["=", "!=", ">", "<", "<=", ">="]) def __init__(self, kconfig_file, sdkconfig_file, env=[]): env = [(name, value) for (name, value) in (e.split("=", 1) for e in env)] for name, value in env: value = " ".join(value.split()) os.environ[name] = value self.config = kconfiglib.Kconfig(kconfig_file.name) self.config.load_config(sdkconfig_file.name) def evaluate_expression(self, expression): result = self.config.eval_string(expression) if result == 0: # n return False elif result == 2: # y return True else: # m raise Exception("Unsupported config expression result.") @staticmethod def get_expression_grammar(): identifier = SDKConfig.IDENTIFIER.setResultsName("identifier") operator = SDKConfig.OPERATOR.setResultsName("operator") value = SDKConfig.VALUE.setResultsName("value") test_binary = identifier + operator + value test_single = identifier test = test_binary | test_single condition = Group( Optional("(").suppress() + test + Optional(")").suppress()) grammar = infixNotation(condition, [("!", 1, opAssoc.RIGHT), ("&&", 2, opAssoc.LEFT), ("||", 2, opAssoc.LEFT)]) return grammar
def get_standard_type_defs(word): tuple_str = Forward() list_str = Forward() dict_str = Forward() cvt_tuple = lambda toks: tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) list_item = (none | boolean | real | integer | list_str | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | word) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_str.inner = list_of(list_item) tuple_str.inner.setParseAction(cvt_tuple) tuple_str << (lparen + tuple_str.inner + rparen) list_str.inner = tuple_str.inner.copy() list_str.inner.setParseAction(lambda toks: list(toks)) list_str << (lbrack + list_str.inner + rbrack) dict_entry = Group(list_item + colon + list_item2) dict_str.inner = list_of(dict_entry) dict_str.inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_str.inner) + rbrace) return { 'tuple': tuple_str, 'list': list_str, 'dict': dict_str, 'list_item': list_item }
def _query_expression(): operand = quotedString.setParseAction(removeQuotes) return operatorPrecedence(operand, [ (CaselessLiteral('not'), 1, opAssoc.RIGHT, _not_expression), (CaselessLiteral('and'), 2, opAssoc.LEFT, _and_expression), (CaselessLiteral('or'), 2, opAssoc.LEFT, _or_expression) ])
def transform(txt): idx1 = txt.find('[') idx2 = txt.find('{') if idx1 < idx2 and idx1 > 0: txt = txt[idx1:txt.rfind(']') + 1] elif idx2 < idx1 and idx2 > 0: txt = txt[idx2:txt.rfind('}') + 1] try: json.loads(txt) except: # parse dict-like syntax LBRACK, RBRACK, LBRACE, RBRACE, COLON, COMMA = map(Suppress, "[]{}:,") integer = Regex(r"[+-]?\d+").setParseAction(lambda t: int(t[0])) real = Regex(r"[+-]?\d+\.\d*").setParseAction(lambda t: float(t[0])) string_ = Word(alphas, alphanums + "_") | quotedString.setParseAction(removeQuotes) bool_ = oneOf("true false").setParseAction(lambda t: t[0] == "true") item = Forward() key = string_ dict_ = LBRACE - Optional(dictOf(key + COLON, item + Optional(COMMA))) + RBRACE list_ = LBRACK - Optional(delimitedList(item)) + RBRACK item << (real | integer | string_ | bool_ | Group(list_ | dict_)) result = item.parseString(txt, parseAll=True)[0] print result txt = result return txt
def get_standard_type_defs(word): tuple_str = Forward() list_str = Forward() dict_str = Forward() cvt_tuple = lambda toks: tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) list_item = ( none | boolean | real | integer | list_str | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | word ) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_str.inner = list_of(list_item) tuple_str.inner.setParseAction(cvt_tuple) tuple_str << (lparen + tuple_str.inner + rparen) list_str.inner = tuple_str.inner.copy() list_str.inner.setParseAction(lambda toks: list(toks)) list_str << (lbrack + list_str.inner + rbrack) dict_entry = Group(list_item + colon + list_item2) dict_str.inner = list_of(dict_entry) dict_str.inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_str.inner) + rbrace) return {"tuple": tuple_str, "list": list_str, "dict": dict_str, "list_item": list_item}
def getEbnfParser(symbols): """ Returns an EBNF parser for the command language. """ identifier = Word(alphas + '_', alphanums + '_') string = quotedString.setParseAction( lambda t: symbols.append((t[0][1:-1], TokenType.StrLit)) ) integer = Word(nums).setParseAction( lambda t: symbols.append((int(t[0]), TokenType.NumLit)) ) var = Suppress("$") + identifier var.setParseAction( lambda t: symbols.append((t[0], TokenType.Var)) ) literal = var | string | integer fnid = Suppress(Optional(".")) + identifier fnid.setParseAction( lambda t: symbols.append((t[0], TokenType.Call)) ) call = Forward() callb = fnid + ZeroOrMore(call | literal) call << ((Suppress("(") + callb + Suppress(")")) | callb) fndef_head = Suppress("let") + identifier fndef_head.setParseAction( lambda t: symbols.append((t[0], TokenType.Def)) ) definition = fndef_head + ZeroOrMore(var) + Suppress("=") + call cmd = OneOrMore((definition | call) + Word(";").setParseAction( lambda t: symbols.append((t[0], TokenType.End)) )) msg = OneOrMore(cmd) return msg
def __grammar(self): """ Pyparsing grammar to parse the filter string. """ float_ = Combine(Word(nums) + Literal(".") + Word(nums)).setParseAction( lambda x, y, z: float(z[0])) sci = Combine( Word(nums) + Optional(".") + Optional(Word(nums)) + oneOf("e E") + Optional("-") + Word(nums)).setParseAction(lambda x, y, z: float(z[0])) int_ = Word(nums).setParseAction(lambda x, y, z: int(z[0])) comp = oneOf("in eq gt lt ge le < > = like", caseless=True).setResultsName("comp") op = oneOf("and or", caseless=True).setResultsName("op") lhs = Word(alphanums + "_").setResultsName("lhs") element = sci | float_ | int_ | quotedString.setParseAction( removeQuotes) | Word(alphanums) rhs = (element + ZeroOrMore(Suppress(",") + element)).setResultsName("rhs") stmt = Group(lhs + comp + rhs).setResultsName("statement") expr = stmt + ZeroOrMore(op + stmt) self.grammar = expr
class NginxParser(object): '''A class for parsing nginx config files. Based on https://github.com/fatiherikli/nginxparser''' # constants left_bracket = Literal("{").setParseAction(_nodeify('punctuation')) right_bracket = Literal("}").setParseAction(_nodeify('punctuation')) semicolon = Literal(";").setParseAction(_nodeify('punctuation')) space = White().suppress() key = Word(alphanums + "_/").setParseAction(_nodeify('key')) value = CharsNotIn("{};, ").setParseAction(_nodeify('value')) value2 = CharsNotIn(";" + string.whitespace).setParseAction( _nodeify('value')) quotedstring = quotedString.setParseAction(_nodeify('value')) location = CharsNotIn("{};," + string.whitespace).setParseAction( _nodeify('location')) ifword = Literal("if").setParseAction(_nodeify('keyword')) setword = Literal("set").setParseAction(_nodeify('keyword')) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = (Literal("=") | Literal("~*") | Literal("~") | Literal("^~")).setParseAction(_nodeify('modifier')) # rules directive = (key + ZeroOrMore(space + Or([value, quotedstring])) + semicolon).setParseAction(_nodeifydirective) setblock = (setword + OneOrMore(space + value2) + semicolon).setParseAction(_nodeifyset) block = Forward() ifblock = Forward() subblock = Group(ZeroOrMore(setblock | directive | block | ifblock)) # TODO: parse if condition ifblock = (ifword + SkipTo('{') + left_bracket + subblock + right_bracket).setParseAction(_nodeifyif) block << Group( Group(key + Optional(space + modifier) + Optional(space + location)) + left_bracket + Group(ZeroOrMore(directive | block | ifblock | setblock)) + right_bracket).setParseAction(_nodeifycontext) script = OneOrMore(directive | block).ignore( pythonStyleComment).setParseAction(_nodeifymain).parseWithTabs() def __init__(self, source): self.source = source def parse(self): """ Returns the parsed tree. """ return self.script.parseString(self.source) def as_list(self): """ Returns the list of tree. """ return self.parse().asList()
def pyparse_gml(): """A pyparsing tokenizer for GML graph format. This is not intended to be called directly. See Also -------- write_gml, read_gml, parse_gml Notes ----- This doesn't implement the complete GML specification for nested attributes for graphs, edges, and nodes. """ global graph try: from pyparsing import \ Literal, CaselessLiteral, Word, Forward,\ ZeroOrMore, Group, Dict, Optional, Combine,\ ParseException, restOfLine, White, alphas, alphanums, nums,\ OneOrMore,quotedString,removeQuotes,dblQuotedString except ImportError: raise ImportError, \ "Import Error: not able to import pyparsing: http://pyparsing.wikispaces.com/" if not graph: lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() pound = ("#") comment = pound + Optional( restOfLine ) white = White(" \t\n") point = Literal(".") e = CaselessLiteral("E") integer = Word(nums).setParseAction(lambda s,l,t:[ int(t[0])]) real = Combine( Word("+-"+nums, nums )+ Optional(point+Optional(Word(nums)))+ Optional(e+Word("+-"+nums, nums))).setParseAction( lambda s,l,t:[ float(t[0]) ]) key = Word(alphas,alphanums+'_') value_atom = integer^real^Word(alphanums)^quotedString.setParseAction(removeQuotes) value = Forward() # to be defined later with << operator keyvalue = Group(key+value) value << (value_atom | Group( lbrack + ZeroOrMore(keyvalue) + rbrack )) node = Group(Literal("node") + lbrack + Group(OneOrMore(keyvalue)) + rbrack) edge = Group(Literal("edge") + lbrack + Group(OneOrMore(keyvalue)) + rbrack) creator = Group(Literal("Creator")+ Optional( restOfLine )) version = Group(Literal("Version")+ Optional( restOfLine )) graphkey = Literal("graph").suppress() graph = Optional(creator)+Optional(version)+\ graphkey + lbrack + ZeroOrMore( (node|edge|keyvalue) ) + rbrack graph.ignore(comment) return graph
def getkw_bnf(self): lcb = Literal("{").suppress() rcb = Literal("}").suppress() lsb = Literal("[").suppress() rsb = Literal("]").suppress() lps = Literal("(").suppress() rps = Literal(")").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_sect = rcb end_data = Literal('$end').suppress() prtable = srange("[0-9a-zA-Z]") + '!$%&*+-./<>?@^_|~:' kstr = Word(prtable) ^ quotedString.setParseAction(removeQuotes) name = Word(alphas + "_", alphanums + "_") vec=lsb+delimitedList(Word(prtable) ^ Literal("\n").suppress() ^\ quotedString.setParseAction(removeQuotes))+rsb key = kstr ^ vec keyword = name + eql + kstr vector = name + eql + vec data = Combine(dmark + name) + SkipTo(end_data) + end_data data.setParseAction(self.store_data) sect = name + lcb sect.setParseAction(self.add_sect) key_sect = name + Group(lps + kstr + rps) + lcb key_sect.setParseAction(self.add_sect) vec_sect = name + Group(lps + vec + rps) + lcb vec_sect.setParseAction(self.add_vecsect) end_sect.setParseAction(self.pop_sect) keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) section = Forward() input = section ^ data ^ keyword ^ vector sectdef = sect ^ key_sect ^ vec_sect section << sectdef + ZeroOrMore(input) + rcb bnf = ZeroOrMore(input) bnf.ignore(pythonStyleComment) return bnf
def string_literal(self): """ string_literal ::= "'" string "'" | "\"" string "\"" Any successful match is converted to a single quoted string to simplify post-parsed operations. """ return quotedString.setParseAction( lambda s, l, t: "'{string}'".format(string=removeQuotes(s, l, t)))
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() arg_begin = Literal("(").suppress() arg_end = Literal(")").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' # Helper definitions kstr=Word(prtable) ^ quotedString.setParseAction(removeQuotes) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin key_sect=name+Group(arg_begin+kstr+arg_end)+sect_begin vec_sect=name+Group(arg_begin+vec+ arg_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | key_sect | vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) key_sect.setParseAction(self.add_sect) vec_sect.setParseAction(self.add_vecsect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
def _get_bnf(self): """ Returns the `Backus–Naur Form` for the parser """ if not self.bnf: # Operators exponent_operator = Literal("^") # negate_operator = Literal("!") # TODO: Implement this so we can write `!True` multiply_operator = oneOf("* / %") add_operator = oneOf("+ -") comparison_operator = oneOf("== != < <= > >= & |") ^ Keyword("in") # Functions e = CaselessLiteral("E") pi = CaselessLiteral("PI") lparen, rparen, lbrack, rbrack = map(Suppress, "()[]") ident = Word(alphas, alphas + nums + "_$") variable = Combine(Literal("$") + Word(alphanums + "_")) boolean = Keyword("True") ^ Keyword("False") string = quotedString.setParseAction(removeQuotes) numeric = Combine( Word("+-" + nums, nums) + Optional(Literal(".") + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) none = Keyword("None") expression = Forward() lists = Forward() lists << (lbrack + Optional( delimitedList(numeric ^ variable ^ boolean ^ string)) + rbrack) atom = (Optional("-") + (pi | e | numeric | ident + lparen + expression + rparen).setParseAction(self.push_stack) | (variable | none | boolean | string | Group(lists)).setParseAction(self.push_stack) | (lparen + expression.suppress() + rparen)).setParseAction( self.push_unary_stack) # By defining exponentiation as "atom [^factor]" instead of "atom [^atom], # we get left to right exponents. 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (exponent_operator + factor).setParseAction(self.push_stack)) boolean = factor + ZeroOrMore( (comparison_operator + factor).setParseAction(self.push_stack)) term = boolean + ZeroOrMore( (multiply_operator + boolean).setParseAction(self.push_stack)) self.bnf = expression << term + ZeroOrMore( (add_operator + term).setParseAction(self.push_stack)) return self.bnf
def get_standard_type_defs(word=word_free): """ Return dict of the pyparsing base lexical elements. The compound types (tuple, list, dict) can contain compound types or simple types such as integers, floats and words. Parameters ---------- word : lexical element A custom lexical element for word. Returns ------- defs : dict The dictionary with the following items: - tuple: (..., ..., ...) - list: [..., ...., ...] - dict: {...:..., ...:..., ....} or {...=..., ...=..., ....} - list_item: any of preceding compound types or simple types """ tuple_str = Forward() list_str = Forward() dict_str = Forward() cvt_tuple = lambda toks: tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) list_item = (none | boolean | real | integer | list_str | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | word) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_str.inner = list_of(list_item) tuple_str.inner.setParseAction(cvt_tuple) tuple_str << (lparen + tuple_str.inner + rparen) list_str.inner = tuple_str.inner.copy() list_str.inner.setParseAction(lambda toks: [list(toks)]) list_str << (lbrack + list_str.inner + rbrack) dict_entry = Group(list_item + (colon | equal_sign) + list_item2) dict_str.inner = list_of(dict_entry) dict_str.inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_str.inner) + rbrace) defs = { 'tuple': tuple_str, 'list': list_str, 'dict': dict_str, 'list_item': list_item } return defs
def _getPattern(self): arith_expr = Forward() comp_expr = Forward() logic_expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas+"_", alphanums+"_") multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal( "^" ) compop = oneOf('> < >= <= != ==') andop = Literal("AND") orop = Literal("OR") current_value = Literal( "." ) assign = Literal( "=" ) # notop = Literal('NOT') function = oneOf(' '.join(self.FUNCTIONS)) function_call = Group(function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR) aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}') single_column = QuotedString(quoteChar='[', endQuoteChar=']') integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") # quotedString enables strings without quotes to pass operand = \ function_call.setParseAction(self.__evalFunction) | \ aggregate_column.setParseAction(self.__evalAggregateColumn) | \ single_column.setParseAction(self.__evalSingleColumn) | \ ((real | integer).setParseAction(self.__evalConstant)) | \ quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \ current_value.setParseAction(self.__evalCurrentValue) | \ identifier.setParseAction(self.__evalString) arith_expr << operatorPrecedence(operand, [ (expop, 2, opAssoc.LEFT, self.__expOp), (multop, 2, opAssoc.LEFT, self.__multOp), (plusop, 2, opAssoc.LEFT, self.__addOp), ]) # comp_expr = Group(arith_expr + compop + arith_expr) comp_expr << operatorPrecedence(arith_expr, [ (compop, 2, opAssoc.LEFT, self.__evalComparisonOp), ]) logic_expr << operatorPrecedence(comp_expr, [ (andop, 2, opAssoc.LEFT, self.__evalLogicOp), (orop, 2, opAssoc.LEFT, self.__evalLogicOp) ]) pattern = logic_expr + StringEnd() return pattern
def pyparse_gml(): """A pyparsing tokenizer for GML graph format. This is not indented to be called directly. See Also -------- write_gml, read_gml, parse_gml Notes ----- This doesn't implement the complete GML specification for nested attributes for graphs, edges, and nodes. """ global graph try: from pyparsing import \ Literal, CaselessLiteral,Word,\ ZeroOrMore, Group, Dict, Optional, Combine,\ ParseException, restOfLine, White, alphanums, nums,\ OneOrMore,quotedString,removeQuotes,dblQuotedString except ImportError: raise ImportError, \ "Import Error: not able to import pyparsing: http://pyparsing.wikispaces.com/" if not graph: creator = Literal("Creator") + Optional(restOfLine) graphkey = Literal("graph").suppress() lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() pound = ("#") comment = pound + Optional(restOfLine) white = White(" \t\n") point = Literal(".") e = CaselessLiteral("E") integer = Word(nums).setParseAction(lambda s, l, t: [int(t[0])]) real = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))).setParseAction( lambda s, l, t: [float(t[0])]) key = Word(alphanums) value = integer ^ real ^ Word(alphanums) ^ quotedString.setParseAction( removeQuotes) keyvalue = Dict(Group(key+OneOrMore(white).suppress()\ +value+OneOrMore(white).suppress())) node = Group(Literal("node") + lbrack + OneOrMore(keyvalue) + rbrack) edge = Group(Literal("edge") + lbrack + OneOrMore(keyvalue) + rbrack) graph = Optional(creator)+\ graphkey + lbrack + ZeroOrMore(edge|node|keyvalue) + rbrack graph.ignore(comment) return graph
class SDKConfig: """ Evaluates conditional expressions based on the build's sdkconfig and Kconfig files. This also defines the grammar of conditional expressions. """ # A configuration entry is in the form CONFIG=VALUE. Definitions of components of that grammar IDENTIFIER = Word(alphanums.upper() + '_') HEX = Combine('0x' + Word(hexnums)).setParseAction(lambda t: int(t[0], 16)) DECIMAL = Combine(Optional(Literal('+') | Literal('-')) + Word(nums)).setParseAction(lambda t: int(t[0])) LITERAL = Word(printables.replace(':', '')) QUOTED_LITERAL = quotedString.setParseAction(removeQuotes) VALUE = HEX | DECIMAL | LITERAL | QUOTED_LITERAL # Operators supported by the expression evaluation OPERATOR = oneOf(['=', '!=', '>', '<', '<=', '>=']) def __init__(self, kconfig_file, sdkconfig_file): self.config = kconfiglib.Kconfig(kconfig_file) self.config.load_config(sdkconfig_file) def evaluate_expression(self, expression): result = self.config.eval_string(expression) if result == 0: # n return False elif result == 2: # y return True else: # m raise Exception('unsupported config expression result') @staticmethod def get_expression_grammar(): identifier = SDKConfig.IDENTIFIER.setResultsName('identifier') operator = SDKConfig.OPERATOR.setResultsName('operator') value = SDKConfig.VALUE.setResultsName('value') test_binary = identifier + operator + value test_single = identifier test = test_binary | test_single condition = Group( Optional('(').suppress() + test + Optional(')').suppress()) grammar = infixNotation(condition, [('!', 1, opAssoc.RIGHT), ('&&', 2, opAssoc.LEFT), ('||', 2, opAssoc.LEFT)]) return grammar
def get_standard_type_defs(word=word_free): """ Return dict of the pyparsing base lexical elements. The compound types (tuple, list, dict) can contain compound types or simple types such as integers, floats and words. Parameters ---------- word : lexical element A custom lexical element for word. Returns ------- defs : dict The dictionary with the following items: - tuple: (..., ..., ...) - list: [..., ...., ...] - dict: {...:..., ...:..., ....} or {...=..., ...=..., ....} - list_item: any of preceding compound types or simple types """ tuple_str = Forward() list_str = Forward() dict_str = Forward() cvt_tuple = lambda toks : tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) list_item = (none | boolean | cmplx | real | integer | list_str | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | word) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_str.inner = list_of(list_item) tuple_str.inner.setParseAction(cvt_tuple) tuple_str << (lparen + tuple_str.inner + rparen) list_str.inner = tuple_str.inner.copy() list_str.inner.setParseAction(lambda toks: [list(toks)]) list_str << (lbrack + list_str.inner + rbrack) dict_entry = Group(list_item + (colon | equal_sign) + list_item2) dict_str.inner = list_of(dict_entry) dict_str.inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_str.inner) + rbrace) defs = {'tuple' : tuple_str, 'list' : list_str, 'dict' : dict_str, 'list_item' : list_item} return defs
def define_string(self): """ Return the syntax definition for a string. **Do not override this method**, it's not necessary: it already supports unicode strings. If you want to override the delimiters, check :attr:`T_QUOTES`. """ string = quotedString.setParseAction(removeQuotes, self.make_string) string.setName("string") return string
def _getPattern(self): arith_expr = Forward() comp_expr = Forward() logic_expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas + "_", alphanums + "_") multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal("^") compop = oneOf('> < >= <= != ==') andop = Literal("AND") orop = Literal("OR") current_value = Literal(".") assign = Literal("=") # notop = Literal('NOT') function = oneOf(' '.join(self.FUNCTIONS)) function_call = Group( function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR) aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}') single_column = QuotedString(quoteChar='[', endQuoteChar=']') integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") # quotedString enables strings without quotes to pass operand = \ function_call.setParseAction(self.__evalFunction) | \ aggregate_column.setParseAction(self.__evalAggregateColumn) | \ single_column.setParseAction(self.__evalSingleColumn) | \ ((real | integer).setParseAction(self.__evalConstant)) | \ quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \ current_value.setParseAction(self.__evalCurrentValue) | \ identifier.setParseAction(self.__evalString) arith_expr << operatorPrecedence(operand, [ (expop, 2, opAssoc.LEFT, self.__expOp), (multop, 2, opAssoc.LEFT, self.__multOp), (plusop, 2, opAssoc.LEFT, self.__addOp), ]) # comp_expr = Group(arith_expr + compop + arith_expr) comp_expr << operatorPrecedence(arith_expr, [ (compop, 2, opAssoc.LEFT, self.__evalComparisonOp), ]) logic_expr << operatorPrecedence( comp_expr, [(andop, 2, opAssoc.LEFT, self.__evalLogicOp), (orop, 2, opAssoc.LEFT, self.__evalLogicOp)]) pattern = logic_expr + StringEnd() return pattern
def parse_poi_file(self): floatNumber = Regex(r'-?\d+(\.\d*)?([eE][\+-]\d+)?').setParseAction(lambda s, l, t: [float(t[0])]) integer = Word(nums).setParseAction(lambda s, l, t: [long(t[0])]) numericValue = floatNumber | integer poiline = numericValue + numericValue + quotedString.setParseAction(removeQuotes) try: for a in poiline.searchString(file(self.poi_file).read()): self.latlon.append(a.asList()) print a.asList() except TypeError as e: print "failed to open poi file" raise
def __init__(self): dash = Word("-",max=2) operator = oneOf(": =") argValueType1 = quotedString.setParseAction(removeQuotes) argValueType2 = Regex("[a-zA-Z0-9_\./]+") positionalArgument = (argValueType1 | argValueType2) regularArgument = Combine(dash + Word(alphas) + operator + (argValueType1 | argValueType2)) novalueArgument = Combine(dash + Word(alphas)) arguments = ZeroOrMore(positionalArgument | regularArgument | novalueArgument) self.parser = Group(Word(alphas) + arguments).setResultsName("command")
def parse_poi_file(self): floatNumber = Regex(r'-?\d+(\.\d*)?([eE][\+-]\d+)?').setParseAction( lambda s, l, t: [float(t[0])]) integer = Word(nums).setParseAction(lambda s, l, t: [long(t[0])]) numericValue = floatNumber | integer poiline = numericValue + numericValue + quotedString.setParseAction( removeQuotes) try: for a in poiline.searchString(file(self.poi_file).read()): self.latlon.append(a.asList()) print a.asList() except TypeError as e: print "failed to open poi file" raise
def pyparse_gml(): """pyparser tokenizer for GML graph format This doesn't implement the complete GML specification for nested attributes for graphs, edges, and nodes. """ global graph try: from pyparsing import \ Literal, CaselessLiteral,Word,\ ZeroOrMore, Group, Dict, Optional, Combine,\ ParseException, restOfLine, White, alphanums, nums,\ OneOrMore,quotedString,removeQuotes,dblQuotedString except ImportError: raise ImportError, \ "Import Error: not able to import pyparsing: http://pyparsing.wikispaces.com/" if not graph: creator = Literal("Creator")+ Optional( restOfLine ) graphkey = Literal("graph").suppress() lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() pound = ("#") comment = pound + Optional( restOfLine ) white = White(" \t\n") point = Literal(".") e = CaselessLiteral("E") integer = Word(nums).setParseAction(lambda s,l,t:[ int(t[0])]) real = Combine( Word("+-"+nums, nums )+ Optional(point+Optional(Word(nums)))+ Optional(e+Word("+-"+nums, nums))).setParseAction( lambda s,l,t:[ float(t[0]) ]) key=Word(alphanums) value=integer^real^Word(alphanums)^quotedString.setParseAction(removeQuotes) keyvalue = Dict(Group(key+OneOrMore(white).suppress()\ +value+OneOrMore(white).suppress())) node = Group(Literal("node") + lbrack + OneOrMore(keyvalue) + rbrack) edge = Group(Literal("edge") + lbrack + OneOrMore(keyvalue) + rbrack) graph = Optional(creator)+\ graphkey + lbrack + OneOrMore(edge|node|keyvalue) + rbrack graph.ignore(comment) return graph
def load_js_obj_literal(j): """Terrible hack.""" j = j[j.index('{'):] j = j.replace('\n', '').replace('\t', '') j = j.replace(';', '') j = re.sub(r'//.*?{', r'{', j) LBRACK, RBRACK, LBRACE, RBRACE, COLON, COMMA = map(Suppress,"[]{}:,") integer = Regex(r"[+-]?\d+").setParseAction(lambda t:int(t[0])) real = Regex(r"[+-]?\d+\.\d*").setParseAction(lambda t:float(t[0])) string_ = Word(alphas,alphanums+"_") | quotedString.setParseAction(removeQuotes) bool_ = oneOf("true false").setParseAction(lambda t: t[0]=="true") item = Forward() key = string_ dict_ = LBRACE - Optional(dictOf(key+COLON, item+Optional(COMMA))) + RBRACE list_ = LBRACK - Optional(delimitedList(item)) + RBRACK item << (real | integer | string_ | bool_ | Group(list_ | dict_ )) result = item.parseString(j,parseAll=True)[0] return result
def create_bnf(): cvt_int = lambda toks: int(toks[0]) cvt_real = lambda toks: float(toks[0]) cvt_tuple = lambda toks : tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) # define punctuation as suppressed literals (lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon) = map(Suppress,"()[]{}:") integer = Combine(Optional(oneOf("+ -")) + Word(nums)).setName("integer") integer.setParseAction(cvt_int) real = Combine(Optional(oneOf("+ -"))+ Word(nums) + "." + Optional(Word(nums)) + Optional("e" + Optional(oneOf("+ -")) + Word(nums))).setName("real") real.setParseAction(cvt_real) tuple_str = Forward() list_str = Forward() dict_str = Forward() list_item = (real | integer | Group(list_str) | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | Word(alphas8bit + alphas, alphas8bit + alphanums + "_")) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_str << (Suppress("(") + Optional(delimitedList(list_item)) + Optional(Suppress(",")) + Suppress(")")) tuple_str.setParseAction(cvt_tuple) list_str << (lbrack + Optional(delimitedList(list_item) + Optional(Suppress(","))) + rbrack) dict_entry = Group(list_item + colon + list_item2) dict_inner = delimitedList(dict_entry) + Optional(Suppress(",")) dict_inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_inner) + rbrace) return dict_inner
rpar = Literal(")").suppress() backtick = Literal("`").suppress() singlequote = Literal("'").suppress() doublequote = Literal("\"").suppress() identifier = Combine(Word(alphas + "_", alphanums + "_$")) columnName = identifier | (backtick + identifier + backtick) tableName = identifier | (backtick + identifier + backtick) arithSign = Word("+-", exact=1) intNum = Combine(Optional(arithSign) + Word(nums)).setParseAction(lambda toks: int(toks[0])) realNum = Combine(Optional(arithSign) + Word(nums) + "." + Word(nums)).setParseAction(lambda toks: float(toks[0])) # TODO Support scientific notation with a mantissa? e.g. 1e10 columnRval = intNum | realNum | quotedString.setParseAction( lambda toks: toks[0][1:-1]) # Removes quotes eq_ = Literal("=").setParseAction(replaceWith('eq_')) neq_ = oneOf("!= <>").setParseAction(replaceWith('neq_')) gt_ = oneOf(">").setParseAction(replaceWith('gt_')) ge_ = oneOf(">=").setParseAction(replaceWith('ge_')) lt_ = oneOf("<").setParseAction(replaceWith('lt_')) le_ = oneOf("<=").setParseAction(replaceWith('le_')) in_ = CaselessKeyword("in").setParseAction(replaceWith('in_')) nin_ = CaselessKeyword("not in").setParseAction(replaceWith('nin_')) # TODO Add support for LIKE and_ = CaselessKeyword(AND_KEYWORD) or_ = CaselessKeyword(OR_KEYWORD) # TODO Add support for NOT
def parse_element(cls, indent_stack): return (Keyword("@parse_first").suppress() + Literal('(').suppress() + delimitedList(quotedString.setParseAction(removeQuotes)) + Literal(')').suppress()).setResultsName("parse_first")
def _create_field_parser(): """ Creates a parser using pyparsing that works with bibfield rule definitions BNF like grammar: rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include | python_comment include ::= "include(" PATH ")" body ::= [inherit_from] (creator | derived | calculated) [checker] [documentation] [producer] aliases ::= json_id ["[0]" | "[n]"] ["," aliases] creator ::= "creator:" INDENT creator_body+ UNDENT creator_body ::= [decorators] source_format "," source_tag "," python_allowed_expr source_format ::= MASTER_FORMATS source_tag ::= QUOTED_STRING derived ::= "derived" INDENT derived_calculated_body UNDENT calculated ::= "calculated:" INDENT derived_calculated_body UNDENT derived_calculated_body ::= [decorators] "," python_allowed_exp decorators ::= (peristent_identfier | legacy | do_not_cache | parse_first | depends_on | only_if | only_if_master_value)* peristent_identfier ::= @persitent_identifier( level ) legacy ::= "@legacy(" correspondences+ ")" correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")" parse_first ::= "@parse_first(" jsonid+ ")" depends_on ::= "@depends_on(" json_id+ ")" only_if ::= "@only_if(" python_condition+ ")" only_if_master_value ::= "@only_if_master_value(" python_condition+ ")" inherit_from ::= "@inherit_from()" python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call checker ::= "checker:" INDENT checker_function+ UNDENT documentation ::= INDENT doc_string subfield* UNDENT doc_string ::= QUOTED_STRING subfield ::= "@subfield" json_id["."json_id*] ":" docstring producer ::= "producer:" INDENT producer_body UNDENT producer_body ::= producer_code "," python_dictionary producer_code ::= ident """ indent_stack = [1] def check_sub_indent(str, location, tokens): cur_col = col(location, str) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(str, location, "not a subentry") def check_unindent(str, location, tokens): if location >= len(str): return cur_col = col(location, str) if not (cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(str, location, "not an unindent") def do_unindent(): indent_stack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(check_unindent) UNDENT.setParseAction(do_unindent) json_id = ( (Word(alphas + "_", alphanums + "_") + Optional(oneOf("[0] [n]"))) .setResultsName("json_id", listAllMatches=True) .setParseAction(lambda tokens: "".join(tokens)) ) aliases = delimitedList( (Word(alphanums + "_") + Optional(oneOf("[0] [n]"))).setParseAction(lambda tokens: "".join(tokens)) ).setResultsName("aliases") ident = Word(alphas + "_", alphanums + "_") dict_def = originalTextFor(nestedExpr("{", "}")) list_def = originalTextFor(nestedExpr("[", "]")) dict_access = list_access = originalTextFor(ident + nestedExpr("[", "]")) function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr("(", ")")) python_allowed_expr = (dict_def ^ list_def ^ dict_access ^ list_access ^ function_call ^ restOfLine).setResultsName( "value", listAllMatches=True ) persistent_identifier = (Suppress("@persistent_identifier") + nestedExpr("(", ")")).setResultsName( "persistent_identifier" ) legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")"))).setResultsName("legacy", listAllMatches=True) only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")"))).setResultsName("only_if") only_if_master_value = (Suppress("@only_if_value") + originalTextFor(nestedExpr("(", ")"))).setResultsName( "only_if_master_value" ) depends_on = (Suppress("@depends_on") + originalTextFor(nestedExpr("(", ")"))).setResultsName("depends_on") parse_first = (Suppress("@parse_first") + originalTextFor(nestedExpr("(", ")"))).setResultsName("parse_first") memoize = (Suppress("@memoize") + nestedExpr("(", ")")).setResultsName("memoize") field_decorator = parse_first ^ depends_on ^ only_if ^ only_if_master_value ^ memoize ^ legacy # Independent decorators inherit_from = (Suppress("@inherit_from") + originalTextFor(nestedExpr("(", ")"))).setResultsName("inherit_from") override = (Suppress("@") + "override").setResultsName("override") extend = (Suppress("@") + "extend").setResultsName("extend") master_format = ( (Suppress("@master_format") + originalTextFor(nestedExpr("(", ")"))) .setResultsName("master_format") .setParseAction(lambda toks: toks[0]) ) derived_calculated_body = (ZeroOrMore(field_decorator) + python_allowed_expr).setResultsName( "derived_calculated_def" ) derived = "derived" + Suppress(":") + INDENT + derived_calculated_body + UNDENT calculated = "calculated" + Suppress(":") + INDENT + derived_calculated_body + UNDENT source_tag = quotedString.setParseAction(removeQuotes).setResultsName("source_tag", listAllMatches=True) source_format = Word(alphas, alphanums + "_").setResultsName("source_format", listAllMatches=True) creator_body = ( ZeroOrMore(field_decorator) + source_format + Suppress(",") + source_tag + Suppress(",") + python_allowed_expr ).setResultsName("creator_def", listAllMatches=True) creator = "creator" + Suppress(":") + INDENT + OneOrMore(creator_body) + UNDENT field_def = (creator | derived | calculated).setResultsName("type_field", listAllMatches=True) # JsonExtra json_dumps = ( (Suppress("dumps") + Suppress(",") + python_allowed_expr) .setResultsName("dumps") .setParseAction(lambda toks: toks.value[0]) ) json_loads = ( (Suppress("loads") + Suppress(",") + python_allowed_expr) .setResultsName("loads") .setParseAction(lambda toks: toks.value[0]) ) json_extra = (Suppress("json:") + INDENT + Each((json_dumps, json_loads)) + UNDENT).setResultsName("json_ext") # Checker checker_function = ( Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr("(", ")")) ).setResultsName("checker", listAllMatches=True) checker = "checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT # Description/Documentation doc_double = QuotedString(quoteChar='"""', multiline=True) doc_single = QuotedString(quoteChar="'''", multiline=True) doc_string = INDENT + (doc_double | doc_single) + UNDENT description_body = (Suppress("description:") + doc_string).setParseAction(lambda toks: toks[0][0]) description = (description_body | doc_double | doc_single).setResultsName("description") # Producer producer_code = (Word(alphas, alphanums + "_") + originalTextFor(nestedExpr("(", ")"))).setResultsName( "producer_code", listAllMatches=True ) producer_body = (producer_code + Suppress(",") + python_allowed_expr).setResultsName( "producer_rule", listAllMatches=True ) producer = Suppress("producer:") + INDENT + OneOrMore(producer_body) + UNDENT schema = ( (Suppress("schema:") + INDENT + dict_def + UNDENT).setParseAction(lambda toks: toks[0]).setResultsName("schema") ) body = ( Optional(field_def) & Optional(checker) & Optional(json_extra) & Optional(description) & Optional(producer) & Optional(schema) ) comment = Literal("#") + restOfLine + LineEnd() include = (Suppress("include") + quotedString).setResultsName("includes", listAllMatches=True) rule = ( Optional(persistent_identifier) + Optional(inherit_from) + Optional(override) + Optional(extend) + json_id + Optional(Suppress(",") + aliases) + Suppress(":") + INDENT + body + UNDENT ).setResultsName("rules", listAllMatches=True) return OneOrMore(rule | include | comment.suppress())
from utils import choose_one, error_exit REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])' REGEX_LOG_FORMAT_VARIABLE = r'\$([a-z0-9\_]+)' LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] ' \ '"$request" $status $body_bytes_sent ' \ '"$http_referer" "$http_user_agent"' # common parser element semicolon = Literal(';').suppress() # nginx string parameter can contain any character except: { ; " ' parameter = Word(''.join(c for c in printables if c not in set('{;"\''))) # which can also be quoted parameter = parameter | quotedString.setParseAction(removeQuotes) def detect_config_path(): """ Get nginx configuration file path based on `nginx -V` output :return: detected nginx configuration file path """ try: proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE) except OSError: error_exit('Access log file or format was not set and nginx config file cannot be detected. ' + 'Perhaps nginx is not in your PATH?') stdout, stderr = proc.communicate() version_output = stderr.decode('utf-8')
symbol=t[0][0] d={} addToTree(symbol,d) treestack.append(curtree) curtree=d def endBlock(s,l,t): global curtree,treestack debugToks("endBlock",s,l,t) curtree=treestack.pop() symbol=Word(alphas+'_',alphanums+'_') hexval=Combine(Literal('0x')+Word(nums+'abcdefABCDEF')).setParseAction(convertHex) decval=Word(nums).setParseAction(convertDec) enumval=Word(alphas+'_',alphanums+'_').setParseAction(convertEnum) stringval=quotedString.setParseAction(convertStr) value=hexval|decval|quotedString|enumval assignment=Group(symbol+'='+value).setParseAction(addAssignment) block=Forward() statement=assignment|block block<<Group(symbol+'{').setParseAction(startBlock)+ZeroOrMore(statement)+Literal('}').setParseAction(endBlock) comment=cStyleComment|(Literal('//')+restOfLine) config=ZeroOrMore(statement).ignore(comment) def parseFile(s): global config, tree, curtree tree={} curtree=tree treestack=[] config.parseFile(s) return tree
# Also, I think there is a bug in IRIRef.* in that they assume that the # IRIref will be a URIRef, but it could also be a QName. DatasetClause = ( FROM + (IRIref.copy().setParseAction(refer_component(components.RemoteGraph)) | NAMED + IRIref.copy().setParseAction(refer_component(components.NamedGraph)))) if DEBUG: DatasetClause.setName('DatasetClause') # String: # # TODO: flesh this out to include multiline strings, and also # investigate a possible bug with Expression.ParsedString; it # doesn't look like it is properly expanding escaped characters. String = quotedString.setParseAction( composition2([removeQuotes, components.ParsedString])) if DEBUG: String.setName('String') # RDFLiteral AT = Suppress('@') LANGTAG = AT + Regex(PN_CHARS_BASE_re + '+' + regex_group('-[a-zA-Z0-9]+') + '*') DOUBLE_HAT = Suppress('^^') unescape_dict = ( (r'\t', '\t'), (r'\n', '\n'), (r'\r', '\r'), (r'\b', '\b'), (r'\f', '\f'), (r'\"', '"'), (r"\'", "'"), (r'\\', '\\')) # must be done last!
pass ident = Word(alphas, alphanums + "_:") columnName = (ident | quotedString())("columnName") whereExpression = Forward() and_ = Keyword("and", caseless=True)('and') or_ = Keyword("or", caseless=True)('or') in_ = Keyword("in", caseless=True)("in") isnotnull = Keyword("is not null", caseless=True)('notnull') binop = oneOf("= != < > >= <=", caseless=True)('binop') intNum = Word(nums) columnRval = (intNum | quotedString.setParseAction(lambda x: x[0][1:-1]))('rval*') whereCondition = Group((columnName + isnotnull) | (columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | ("(" + whereExpression + ")"))('condition') whereExpression << Group(whereCondition + ZeroOrMore( (and_ | or_) + whereExpression))('expression') class SQLValidator(object): """ Parses a subset of SQL to define feature selections. This validates the SQL to make sure the user can't do anything dangerous.""" def __init__(self, s): self._s = s self._errors = [] self._parse_result = None
realNum = Combine(Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) # noqa + Optional(E + Optional(arithSign) + Word(nums))) realNum.setParseAction(lambda x: expression.NumericLiteral(float(x[0]))) intNum = Combine(Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) intNum.setParseAction(lambda x: expression.NumericLiteral(int(x[0]))) number = realNum | intNum variable = ident.copy() variable.setParseAction(lambda x: model.Var(x[0])) quotedString.setParseAction(lambda x: expression.StringLiteral(x[0][1:-1])) literal = quotedString | number valueref = variable | literal def mkterm(x): return model.Term(x) term = (predicate + drop("(") + Group(delimitedList(valueref, ",")) + drop(")")).setParseAction(mkterm)
def _create_field_parser(): """Create a parser that can handle field definitions. BFN like grammar:: rule ::= [pid | extend | override] json_id ["," aliases]":" body json_id ::= (letter|"_") (letter|digit|_)* aliases ::= json_id ["," aliases] pid ::= @persistent_identifier( level ) extend ::= @extend override ::= @override hidden ::= @hidden body ::=(creator* | derived | calculated) (extensions)* creator ::= [decorators] format "," tag "," expr derived ::= [decorators] expr calculated ::= [decorators] expr To check the syntactics of the parser extensions or decorators please go to :mod:`invenio.modules.jsonalchemy.jsonext.parsers` """ indent_stack = [1] # Independent/special decorators persistent_identifier = ( Keyword('@persistent_identifier').suppress() + nestedExpr() ).setResultsName('pid').setParseAction(lambda toks: int(toks[0][0])) override = Keyword('@override').suppress()\ .setResultsName('override')\ .setParseAction(lambda toks: True) extend = Keyword('@extend').suppress()\ .setResultsName('extend')\ .setParseAction(lambda toks: True) hidden = Keyword('@hidden').suppress()\ .setResultsName('hidden')\ .setParseAction(lambda toks: True) rule_decorators = (Optional(persistent_identifier) & Optional(override) & Optional(extend) & Optional(hidden)) # Field definition decorators field_decorators = Each( [Optional(p.parser.parse_element(indent_stack)) for p in parsers if issubclass(p.parser, DecoratorBaseExtensionParser)]) # Creator rules creator_body = ( Optional(field_decorators).setResultsName('decorators') + Word(alphas, alphanums + '_') + Literal(',').suppress() + quotedString.setParseAction(removeQuotes) + Literal(',').suppress() + PYTHON_ALLOWED_EXPR ).setParseAction(lambda toks: { 'source_format': toks[-3], 'source_tags': toks[-2].split(' '), 'function': compile(toks[-1].strip(), '', 'eval'), 'type': 'creator', 'decorators': toks.decorators.asDict()} ).setResultsName('creator_def', listAllMatches=True) creator = (Keyword('creator:').suppress() + indentedBlock(OneOrMore(creator_body), indent_stack)) # Derived and calculated rules der_calc_body = (Optional(field_decorators).setResultsName('decorators') + PYTHON_ALLOWED_EXPR) derived = ( Keyword('derived:').suppress() + indentedBlock(der_calc_body, indent_stack) ).setParseAction(lambda toks: { 'source_format': 'derived', 'source_tags': None, 'function': compile(toks[-1].strip(), '', 'eval'), 'type': 'derived', 'decorators': toks.decorators.asDict()}).setResultsName('derived_def') calculated = ( Keyword('calculated:').suppress() + indentedBlock(der_calc_body, indent_stack) ).setParseAction(lambda toks: { 'source_format': 'calculated', 'source_tags': None, 'function': compile(toks[-1].strip(), '', 'eval'), 'type': 'calculated', 'decorators': toks.decorators.asDict() }).setResultsName('calculated_def') rule_sections = [Optional(creator | derived | calculated), ] rule_sections.extend([Optional(p.parser.parse_element(indent_stack)) for p in parsers if issubclass(p.parser, FieldBaseExtensionParser)]) json_id = (IDENT + Optional(Suppress(',') + delimitedList(Word(alphanums + '_'))) + Suppress(':') ).setResultsName('field')\ .setParseAction(lambda toks: {'json_id': toks[0], 'aliases': toks[1:]}) rule = Group(Optional(rule_decorators) + json_id + indentedBlock(Each(rule_sections), indent_stack) ) return OneOrMore(COMMENT.suppress() | rule)
def __init__(self, network): self.network = network self.g_business_relationship = nx.DiGraph() self.user_defined_sets = {} self.user_library_calls = [] self.user_defined_functions = {} # Grammars #TODO: tidy this up attribute_unnamed = Word(alphanums+'_'+".") attribute = attribute_unnamed.setResultsName("attribute") self.attribute = attribute lt = Literal("<").setResultsName("<") le = Literal("<=").setResultsName("<=") eq = Literal("=").setResultsName("=") ne = Literal("!=").setResultsName("!=") ge = Literal(">=").setResultsName(">=") gt = Literal(">").setResultsName(">") wildcard = Literal("*").setResultsName("wildcard") self.wildcard = wildcard self.prefix_lists = {} self.tags_to_allocate = set() self.allocated_tags = {} self._opn = { '<': operator.lt, '<=': operator.le, '=': operator.eq, '!=': operator.ne, '>=': operator.ge, '>': operator.gt, '&': set.intersection, '|': set.union, } # map alphanum chars to alphanum equivalents for use in tags self._opn_to_tag = { '<': "lt", '<=': "le", '=': "eq", '!=': "ne", '>=': "ge", '>': "gt", '&': "and", '|': "or", } # Both are of comparison to access in same manner when evaluating comparison = (lt | le | eq | ne | ge | gt).setResultsName("comparison") stringComparison = (eq | ne).setResultsName("comparison") # #quoted string is already present float_string = Word(nums).setResultsName("value").setParseAction(lambda t: float(t[0])) integer_string = Word(nums).setResultsName("value").setParseAction(lambda t: int(t[0])) #TODO: use numString, and make integer if fiull stop #TODO: allow parentheses? - should be ok as pass to the python parser ipField = Word(nums, max=3) ipAddress = Combine( ipField + "." + ipField + "." + ipField + "." + ipField ).setResultsName("ipAddress") boolean_and = Literal("&").setResultsName("&") boolean_or = Literal("|").setResultsName("|") boolean = (boolean_and | boolean_or).setResultsName("boolean") self._boolean = boolean # need to use in checking #TODO fix this matching 2a.ab when that should match a string numericQuery = Group(attribute + comparison + float_string).setResultsName( "numericQuery") stringValues = (attribute_unnamed | quotedString.setParseAction(removeQuotes) ).setResultsName("value") stringQuery = Group(attribute + stringComparison + stringValues).setResultsName( "stringQuery") wildcardQuery = wildcard.setResultsName("wildcardQuery") singleQuery = numericQuery | stringQuery | wildcardQuery singleQuery.setFailAction(parse_fail_action) self.nodeQuery = singleQuery + ZeroOrMore(boolean + singleQuery) self.u_egress = Literal("egress->").setResultsName("u_egress") self.v_ingress = Literal("->ingress").setResultsName("v_ingress") self.u_ingress = Literal("ingress<-").setResultsName("u_ingress") self.v_egress = Literal("<-egress").setResultsName("v_egress") edgeType = ( self.u_egress | self.u_ingress | self.v_egress | self.v_ingress).setResultsName("edgeType").setFailAction(parse_fail_action) self.edgeQuery = ("(" + self.nodeQuery.setResultsName("query_a") + ")" + edgeType + "(" + self.nodeQuery.setResultsName("query_b") + ")").setFailAction(parse_fail_action) #start of BGP queries originQuery = (Literal("Origin").setResultsName("attribute") + #this is a workaround for the match, comparison, value 3-tuple in processing Literal("(").setResultsName("comparison") + Group(self.nodeQuery).setResultsName("value") + Suppress(")")).setResultsName("originQuery") transitQuery = (Literal("Transit").setResultsName("attribute") + #this is a workaround for the match, comparison, value 3-tuple in processing Literal("(").setResultsName("comparison") + Group(self.nodeQuery).setResultsName("value") + Suppress(")")).setResultsName("transitQuery") prefixList = Literal("prefix_list") matchPl = (prefixList.setResultsName("attribute") + comparison + attribute.setResultsName("value")) matchTag = (Literal("tag").setResultsName("attribute") + comparison + attribute.setResultsName("value")) #tags contain -> tag = aaa inTags = ( Literal("tags").setResultsName("attribute").setParseAction(lambda x: "tag") + Literal("contain").setResultsName("comparison").setParseAction(lambda x: "=") + attribute_unnamed.setResultsName("value") ) bgpMatchQuery = Group(matchPl | matchTag | inTags | originQuery | transitQuery ).setResultsName("bgpMatchQuery").setFailAction(parse_fail_action) self.bgpMatchQuery = bgpMatchQuery setLP = (Literal("setLP").setResultsName("attribute") + integer_string.setResultsName("value")).setResultsName("setLP") setMED = (Literal("setMED").setResultsName("attribute") + integer_string.setResultsName("value")).setResultsName("setMED") addTag = (Literal("addTag").setResultsName("attribute") + attribute.setResultsName("value")).setResultsName("addTag") removeTag = (Literal("removeTag").setResultsName("attribute") + attribute.setResultsName("value")).setResultsName("removeTag") #TODO: need to set blank value reject = Literal("reject") #TODO: remove once move quagga output inside module self.reject = reject rejectAction = (reject.setResultsName("attribute") + Literal("route").setResultsName("value")).setResultsName("reject") setNextHop = (Literal("setNextHop").setResultsName("attribute") + ipAddress.setResultsName("value")).setResultsName("setNextHop") setOriginAttribute = (Literal("setOriginAttribute").setResultsName("attribute") + (oneOf("IGP BGP None").setResultsName("value"))).setResultsName("setOriginAttribute") bgpAction = Group(addTag | setLP | setMED | removeTag | setNextHop | setOriginAttribute | rejectAction).setResultsName("bgpAction") # The Clauses ifClause = Group(Suppress("if") + bgpMatchQuery + ZeroOrMore(Suppress(boolean_and) + bgpMatchQuery)).setResultsName("if_clause") actionClause = bgpAction + ZeroOrMore(Suppress(boolean_and) + bgpAction) thenClause = Group(Suppress("then") + actionClause).setResultsName("then_clause") ifThenClause = Group(Suppress("(") + ifClause + thenClause + Suppress(")")).setResultsName("ifThenClause") elseActionClause = Group(Suppress("(") + actionClause + Suppress(")")).setResultsName("else_clause") # Support actions without a condition (ie no "if") unconditionalAction = Group(Suppress("(") + Group(actionClause).setResultsName("unconditionalActionClause") + Suppress(")")).setResultsName("bgpSessionQuery") # Query may contain itself (nested) bgpSessionQuery = Forward() bgpSessionQuery << ( ifThenClause + Optional( Suppress("else") + (elseActionClause | bgpSessionQuery)) ).setResultsName("bgpSessionQuery") bgpSessionQuery = bgpSessionQuery | unconditionalAction self.bgpSessionQuery = bgpSessionQuery self.bgpApplicationQuery = self.edgeQuery + Suppress(":") + self.bgpSessionQuery # Library stuff set_values = Suppress("{") + delimitedList( attribute, delim=',').setResultsName("set_values") + Suppress("}") #Set to empty set, rather than empty list as empty list is processed differently somewhere in parser empty_set = Literal("{}").setResultsName("set_values").setParseAction(lambda x: set()) self.set_definition = attribute.setResultsName("set_name") + Suppress("=") + (empty_set | set_values) library_params = attribute | Group(set_values) | empty_set library_function = attribute.setResultsName("def_name") + Suppress("(") + delimitedList( library_params, delim=',').setResultsName("def_params") + Suppress(")") library_function.setFailAction(parse_fail_action) self.library_def = Suppress("define") + library_function self.library_call = Suppress("apply") + library_function self.library_def.setFailAction(parse_fail_action) self.library_edge_query = (self.attribute.setResultsName("query_a") + edgeType + self.attribute.setResultsName("query_b")) self.library_edge_query.setFailAction(parse_fail_action) library_edge_definition = self.library_edge_query + Suppress(":") + self.bgpSessionQuery library_global_definition = "global tags = {" + delimitedList( attribute, delim=',').setResultsName("tags") + "}" self.library_entry = library_global_definition.setResultsName("global_tags") | library_edge_definition.setResultsName("library_edge") self.library_entry.setFailAction(parse_fail_action) self.bgpPolicyLine = ( self.bgpApplicationQuery.setResultsName("bgpApplicationQuery") | self.library_call.setResultsName("library_call") | self.set_definition.setResultsName("set_definition") )
def restscrape(resturl, filenamersc, filenamerevsc): time.sleep(randint(2,8)) # Read the url response = urllib2.urlopen(resturl) soup = BeautifulSoup(response.read()) response.close() # Check if it is rated if soup.find(itemprop="ratingValue") == None: return # Anamoly if soup.find(class_="container no-reviews") != None: return # Check if it is not the alternate version if soup.find(id="mapbox") != None: print "alt version" restscrape(resturl, filenamersc, filenamerevsc) return # Check if it is not an alternate version if soup.find(class_="friend-count miniOrange") == None: print "alt version rev" restscrape(resturl, filenamersc, filenamerevsc) return #### ## ## ######## ####### ## ### ## ## ## ## ## #### ## ## ## ## ## ## ## ## ###### ## ## ## ## #### ## ## ## ## ## ### ## ## ## #### ## ## ## ####### # Key Yelp information title = soup.find(property="og:title").get("content").encode('utf-8') latitude = soup.find(property="place:location:latitude").get("content") longitude = soup.find(property="place:location:longitude").get("content") rating = soup.find(itemprop="ratingValue").get("content") reviewCount = soup.find(itemprop="reviewCount").get_text() if soup.find(id="cat_display") != None: categories = soup.find(id="cat_display").get_text().strip() categories = ' '.join(categories.split()) else: categories = "None" if soup.find(class_="photo-box-img")['src'] != "http://s3-media1.ak.yelpcdn.com/assets/2/www/img/5f69f303f17c/default_avatars/business_medium_square.png": photos = "Has photos" else: photos = "None" if soup.find(id="bizUrl") != None: URL = soup.find(id="bizUrl").get_text().strip().encode('utf-8') else: URL = "None" # Get Neighborhoods # Particularly special code because it has to be stripped from javascript script # Automatically strip quotes from quoted strings # quotedString matches single or double quotes neighborhood = "" quotedString.setParseAction(removeQuotes) # Define a pattern to extract the neighborhoods: entry neighborhoodsSpec = Literal('\"neighborhoods\":') + '[' + delimitedList(quotedString)('neighborhoods') + ']' for hoods in neighborhoodsSpec.searchString(soup): neighborhood = str(hoods.neighborhoods) # Yelp Interaction/Information if soup.find(class_="yelp-menu") != None: menu = "Has menu" else: menu = "None" if soup.find(id="opentable-reservation-actions") != None: reservable = "Reservable" else: reservable = "None" if soup.find(class_="media-story offer-detail") != None: deal = "Has deal" else: deal = "None" if soup.find(id="delivery-address-form") != None: yelpDelivery = "Delivery system" else: yelpDelivery = "None" if soup.find(id="bizSlide") != None: slides = "Has slides" else: slides = "None" # Restaurant status if soup.find(id="bizSupporter") != None: sponsor = "Sponsors" else: sponsor = "None" if soup.find(id="bizClaim") != None: claim = "Unclaimed" else: claim = "None" if soup.find(style="color:#999999;") == None: eliteReviews = "Has Elites" else: eliteReviews = "None" # Restaurant attributes from attributes section # Attributes self-explanatory if soup.find(class_="attr-transit") != None: transit = soup.find(class_="attr-transit").get_text().strip() else: transit = "None" if soup.find(class_="attr-BusinessHours") != None: hours = soup.find('dd', class_="attr-BusinessHours").get_text() else: hours = "None" if soup.find(class_="attr-RestaurantsAttire") != None: attire = soup.find('dd', class_="attr-RestaurantsAttire").get_text() else: attire = "None" if soup.find(class_="attr-BusinessAcceptsCreditCards") != None: creditCards = soup.find('dd', class_="attr-BusinessAcceptsCreditCards").get_text() else: creditCards = "None" if soup.find(class_="attr-BusinessParking") != None: parking = soup.find('dd', class_="attr-BusinessParking").get_text() else: parking = "None" if soup.find(class_="attr-RestaurantsPriceRange2") != None: price = soup.find('dd', class_="attr-RestaurantsPriceRange2").get_text().strip() else: price = "None" if soup.find(class_="attr-RestaurantsGoodForGroups") != None: groups = soup.find('dd', class_="attr-RestaurantsGoodForGroups").get_text() else: groups = "None" if soup.find(class_="attr-GoodForKids") != None: kids = soup.find('dd', class_="attr-GoodForKids").get_text() else: kids = "None" if soup.find(class_="attr-RestaurantsReservations") != None: reservations = soup.find('dd', class_="attr-RestaurantsReservations").get_text() else: reservations = "None" if soup.find(class_="attr-RestaurantsDelivery") != None: delivery = soup.find('dd', class_="attr-RestaurantsDelivery").get_text() else: delivery = "None" if soup.find(class_="attr-RestaurantsTakeOut") != None: takeout = soup.find('dd', class_="attr-RestaurantsTakeOut").get_text() else: takeout = "None" if soup.find(class_="attr-RestaurantsTableService") != None: service = soup.find('dd', class_="attr-RestaurantsTableService").get_text() else: service = "None" if soup.find(class_="attr-OutdoorSeating") != None: outdoorSeating = soup.find('dd', class_="attr-OutdoorSeating").get_text() else: outdoorSeating = "None" if soup.find(class_="attr-WiFi") != None: wifi = soup.find('dd', class_="attr-WiFi").get_text() else: wifi = "None" if soup.find(class_="attr-GoodForMeal") != None: meals = soup.find('dd', class_="attr-GoodForMeal").get_text() else: meals = "None" if soup.find(class_="attr-BestNights") != None: bestNights = soup.find('dd', class_="attr-BestNights").get_text() else: bestNights = "None" if soup.find(class_="attr-HappyHour") != None: happyHour = soup.find('dd', class_="attr-HappyHour").get_text() else: happyHour = "None" if soup.find(class_="attr-Alcohol") != None: alcohol = soup.find('dd', class_="attr-Alcohol").get_text() else: alcohol = "None" if soup.find(class_="attr-Smoking") != None: smoking = soup.find('dd', class_="attr-Smoking").get_text() else: smoking = "None" if soup.find(class_="attr-CoatCheck") != None: coatCheck = soup.find('dd', class_="attr-CoatCheck").get_text() else: coatCheck = "None" if soup.find(class_="attr-NoiseLevel") != None: noise = soup.find('dd', class_="attr-NoiseLevel").get_text() else: noise = "None" if soup.find(class_="attr-GoodForDancing") != None: goodForDancing = soup.find('dd', class_="attr-GoodForDancing").get_text() else: goodForDancing = "None" if soup.find(class_="attr-Ambience") != None: ambience = soup.find('dd', class_="attr-Ambience").get_text() else: ambience = "None" if soup.find(class_="attr-HasTV") != None: tv = soup.find('dd', class_="attr-HasTV").get_text() else: tv = "None" if soup.find(class_="attr-Caters") != None: caters = soup.find('dd', class_="attr-Caters").get_text() else: caters = "None" if soup.find(class_="attr-WheelchairAccessible") != None: wheelchairAccessible = soup.find('dd', class_="attr-WheelchairAccessible").get_text() else: wheelchairAccessible = "None" if soup.find(class_="attr-DogsAllowed") != None: dogsAllowed = soup.find('dd', class_="attr-DogsAllowed").get_text() else: dogsAllowed = "None" with open(filenamersc, "ab") as filer: fr = csv.writer(filer) # Writing to CSV fr.writerow([resturl, title, latitude, longitude, rating, reviewCount, categories, photos, URL, neighborhood, menu, reservable, yelpDelivery, slides, sponsor, claim, eliteReviews, transit, hours, attire, creditCards, parking, price, groups, kids, reservations, deal, delivery, takeout, service, outdoorSeating, wifi, meals, bestNights, happyHour, alcohol, smoking, coatCheck, noise, goodForDancing, ambience, tv, caters, wheelchairAccessible]) ######## ######## ## ## #### ######## ## ## ###### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ######## ###### ## ## ## ###### ## ## ## ###### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ######## ### #### ######## ### ### ###### # Parsing top 40 Reviews reviews = soup.findAll(itemprop="review") for review in reviews: # Get user data if review.find(title="User is Elite") != None: eliteStatus = "Elite" else: eliteStatus = "None" friendCount = review.find(class_="friend-count miniOrange").get_text()[:-8].strip() reviewCount = review.find(class_="review-count miniOrange").get_text()[:-8].strip() if review.find(class_="photo-box-img")['src'] != "http://s3-media4.ak.yelpcdn.com/assets/2/www/img/78074914700f/default_avatars/user_small_square.png": userPhoto = "Has photo" else: userPhoto = "None" reviewInfo = review.find(class_="reviewer_info").get_text().encode('utf-8') # Get review data reviewRating = review.find(itemprop="ratingValue").get("content") publish = review.find(itemprop="datePublished").get("content") description = review.find(itemprop="description").get_text().encode('utf-8') # Get review attributes if review.find(class_="i-wrap ig-wrap-common i-camera-common-wrap badge photo-count") != None: reviewPix = review.find(class_="i-wrap ig-wrap-common i-camera-common-wrap badge photo-count").get_text()[:-6].strip() else: reviewPix = "None" if review.find(class_="i-wrap ig-wrap-common i-opentable-badge-common-wrap badge opentable-badge-marker") != None: reviewSeated = "Seated" else: reviewSeated = "None" if review.find(class_="i ig-common i-deal-price-tag-common") != None: reviewDeal = "Purchased Deal" else: reviewDeal = "None" if review.find(class_="i-wrap ig-wrap-common i-checkin-burst-blue-small-common-wrap badge checkin checkin-irregular") != None: reviewCheckIn = review.find(class_="i-wrap ig-wrap-common i-checkin-burst-blue-small-common-wrap badge checkin checkin-irregular").get_text()[:-14].strip() else: reviewCheckIn = "None" # Special Qype users lack stats if review.find(class_="count"): usefulfunnycool = review.findAll(class_="count") # Get useful, funny, cool statistics if usefulfunnycool[0].get_text() != "": useful = usefulfunnycool[0].get_text() else: useful = 0 if usefulfunnycool[1].get_text() != "": funny = usefulfunnycool[1].get_text() else: funny = 0 if usefulfunnycool[2].get_text() != "": cool = usefulfunnycool[2].get_text() else: cool = 0 else: useful = 0 funny = 0 cool = 0 with open(filenamerevsc, "ab") as filerev: frev = csv.writer(filerev) # Writing to CSV frev.writerow([resturl, eliteStatus, friendCount, reviewCount, userPhoto, reviewInfo, reviewRating, publish, description, reviewPix, reviewSeated, reviewDeal, reviewCheckIn, useful, funny, cool])
def braces_parser(text, opener=BLOB_OPENER, closer=BLOB_CLOSER): cvtTuple = lambda toks: tuple(toks.asList()) # @IgnorePep8 cvtRaw = lambda toks: RawString(' '.join(map(str, toks.asList())) ) # @IgnorePep8 cvtDict = lambda toks: GlobDict(toks.asList()) # @IgnorePep8 extractText = lambda s, l, t: RawString(s[t._original_start:t._original_end ]) # @IgnorePep8 def pythonize(toks): s = toks[0] if s == 'true': return True elif s == 'false': return False elif s == 'none': return [None] elif s.isdigit(): return int(s) elif re.match('(?i)^-?(\d+\.?e\d+|\d+\.\d*|\.\d+)$', s): return float(s) return toks[0] def noneDefault(s, loc, t): return t if len(t) else [RawEOL] # define punctuation as suppressed literals lbrace, rbrace = map(Suppress, "{}") identifier = Word(printables, excludeChars='{}"\'') quotedStr = QuotedString('"', escChar='\\', multiline=True) | \ QuotedString('\'', escChar='\\', multiline=True) quotedIdentifier = QuotedString('"', escChar='\\', unquoteResults=False) | \ QuotedString('\'', escChar='\\', unquoteResults=False) dictStr = Forward() setStr = Forward() objStr = Forward() oddIdentifier = identifier + quotedIdentifier dictKey = quotedIdentifier | \ Combine(oddIdentifier).setParseAction(cvtRaw) dictKey.setParseAction(cvtRaw) dictValue = quotedStr | dictStr | setStr | \ Combine(oddIdentifier).setParseAction(cvtRaw) if OLD_STYLE_KEYS: dictKey |= Combine(identifier + ZeroOrMore( White(' ') + (identifier + ~FollowedBy(Optional(White(' ')) + LineEnd())))) dictValue |= identifier.setParseAction(pythonize) else: dictKey |= identifier dictValue |= Or([ delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True), Combine( delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True) + Optional( White(' ') + originalTextFor(nestedExpr('{', '}')). setParseAction(extractText))).setParseAction(cvtRaw) ]) ParserElement.setDefaultWhitespaceChars(' \t') dictEntry = Group(dictKey + Optional(White(' ').suppress() + dictValue).setParseAction(noneDefault) + Optional(White(' ').suppress()) + LineEnd().suppress()) dictStr << (lbrace + ZeroOrMore(dictEntry) + rbrace) dictStr.setParseAction(cvtDict) ParserElement.setDefaultWhitespaceChars(' \t\r\n') setEntry = identifier.setParseAction( pythonize) | quotedString.setParseAction(removeQuotes) | dictStr setStr << (lbrace + delimitedList(setEntry, delim=White()) + rbrace) setStr.setParseAction(cvtTuple) objEntry = dictStr.ignore(pythonStyleComment) objStr << delimitedList(objEntry, delim=LineEnd()) return objStr.parseString(text)[0]
def create_bnf(allow_tuple=False, free_word=False): cvt_int = lambda toks: int(toks[0]) cvt_real = lambda toks: float(toks[0]) cvt_bool = lambda toks: toks[0].lower == 'true' cvt_none = lambda toks: [None] cvt_tuple = lambda toks : tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) # define punctuation as suppressed literals (lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon) = map(Suppress,"()[]{}:") integer = Combine(Optional(oneOf("+ -")) + Word(nums)).setName("integer") integer.setParseAction(cvt_int) boolean = Keyword("False", caseless = True) boolean.setParseAction(cvt_bool) none = Keyword("None", caseless = True) none.setParseAction(cvt_none) real = Combine(Optional(oneOf("+ -"))+ Word(nums) + "." + Optional(Word(nums)) + Optional("e" + Optional(oneOf("+ -")) + Word(nums))).setName("real") real.setParseAction(cvt_real) tuple_str = Forward() list_str = Forward() dict_str = Forward() if free_word: string = Word(alphas8bit + "_-/.+**" + alphanums) else: string = Word(alphas8bit + alphas, alphas8bit + alphanums + "_" ) list_item = (none | boolean | real | integer | list_str | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | string ) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_inner = Optional(delimitedList(list_item)) + Optional(Suppress(",")) tuple_inner.setParseAction(cvt_tuple) tuple_str << (Suppress("(") + tuple_inner + Suppress(")")) list_inner = Optional(delimitedList(list_item) + Optional(Suppress(","))) list_inner.setParseAction(lambda toks: list(toks)) list_str << (lbrack + list_inner + rbrack) dict_entry = Group(list_item + colon + list_item2) dict_inner = delimitedList(dict_entry) + Optional(Suppress(",")) dict_inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_inner) + rbrace) dict_or_tuple = dict_inner | tuple_inner if allow_tuple: return dict_or_tuple else: return dict_inner
return dict(toks.asList()) # define punctuation as suppressed literals lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon = map( Suppress, "()[]{}:") identifier = Regex(r"[a-zA-Z_][\w]+") integer = Regex(r"[+-]?\d+").setName("integer").setParseAction(cvtInt) real = Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName("real").setParseAction( cvtReal) tupleStr = Forward() listStr = Forward() dictStr = Forward() unicodeString.setParseAction(lambda t: t[0][2:-1].decode('unicode-escape')) quotedString.setParseAction(lambda t: t[0][1:-1]) boolLiteral = oneOf("True False").setParseAction(cvtBool) noneLiteral = Literal("None").setParseAction(replaceWith(None)) listItem = real | integer | quotedString | unicodeString | boolLiteral | noneLiteral | Group( listStr) | tupleStr | dictStr tupleStr << (Suppress("(") + Optional(delimitedList(listItem)) + Optional(Suppress(",")) + Suppress(")")) tupleStr.setParseAction(cvtTuple) listStr << (lbrack + Optional(delimitedList(listItem) + Optional(Suppress(","))) + rbrack) dictEntry = Group(listItem + colon + listItem)
elif t == '-': # Next tokens needs to be negated negation = True else: # Append to query the token if negation: t = ~t if operation == 'or': query |= t else: query &= t return query NO_BRTS = printables.replace('(', '').replace(')', '') SINGLE = Word(NO_BRTS.replace('*', '')) WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS) QUOTED = quotedString.setParseAction(removeQuotes) OPER_AND = CaselessLiteral('and') OPER_OR = CaselessLiteral('or') OPER_NOT = '-' TERM = Combine(Optional(Word(alphas).setResultsName('meta') + ':') + (QUOTED.setResultsName('query') | WILDCARDS.setResultsName('query'))) TERM.setParseAction(createQ) EXPRESSION = operatorPrecedence(TERM, [ (OPER_NOT, 1, opAssoc.RIGHT), (OPER_OR, 2, opAssoc.LEFT), (Optional(OPER_AND, default='and'), 2, opAssoc.LEFT)]) EXPRESSION.setParseAction(unionQ)
REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])' REGEX_LOG_FORMAT_VARIABLE = r'\$([a-zA-Z0-9\_]+)' REGEX_CONFIG_INCLUDES = r'include (.+);' LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] ' \ '"$request" $status $body_bytes_sent ' \ '"$http_referer" "$http_user_agent"' LOG_FORMAT_COMMON = '$remote_addr - $remote_user [$time_local] ' \ '"$request" $status $body_bytes_sent ' \ '"$http_x_forwarded_for"' # common parser element semicolon = Literal(';').suppress() # nginx string parameter can contain any character except: { ; " ' parameter = Word(''.join(c for c in printables if c not in set('{;"\''))) # which can also be quoted parameter = parameter | quotedString.setParseAction(removeQuotes) def detect_config_path(): """ Get nginx configuration file path based on `nginx -V` output :return: detected nginx configuration file path """ try: proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE) except OSError: error_exit( 'Access log file or format was not set and nginx config file cannot be detected. ' + 'Perhaps nginx is not in your PATH?') stdout, stderr = proc.communicate()
def __repr__(self): return repr(self.query) def express(self, env): return self.query.express(env) def domain_expression_action(*args): print 'd:', args, [type(i) for i in args] def value_list_action(*args): print 'v:', args, [type(i) for i in args] integer_value = Regex(r'[-]?\d+').setParseAction(NumericIntegerAction) float_value = Regex(r'[-]?\d+(\.\d*)?([eE]\d+)?').setParseAction(NumericFloatAction) value_chars = Word(alphas + alphas8bit, alphanums + alphas8bit + '%.-_*;:') string_value = (value_chars | quotedString.setParseAction(removeQuotes)).setParseAction(StringAction) # value can contain any string once it's quoted value = string_value | integer_value | float_value value_list = (string_value ^ delimitedList(string_value) ^ OneOrMore(string_value)) binop = oneOf('= == != <> < <= > >= not like contains has ilike ' 'icontains ihas is').setName('binop') domain = Word(alphas, alphanums).setName('domain') domain_values = Group(value_list.copy()) domain_expression = (domain + Literal('=') + Literal('*') + stringEnd) \ | (domain + binop + domain_values + stringEnd) AND_ = CaselessLiteral("and") OR_ = CaselessLiteral("or") NOT_ = CaselessLiteral("not") | Literal('!')
def __init__(self): self.json_query = {'query':{}, 'and': [], 'or': []} self.tokens = None #-------------------------------------------------------------------------------------- # <integer> ::= 0-9 # <double> ::= 0-9 ('.' 0-9) # <number> ::= <integer> | <double> #-------------------------------------------------------------------------------------- integer = Regex(r'-?[0-9]+') # Word matches space for some reason double = Regex(r'-?[0-9]+.?[0-9]*') number = double | integer #-------------------------------------------------------------------------------------- # <python-string> ::= (String surrounded by double-quotes) # <wildcard-string> ::= <python-string> # <limited-string> ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes) # <field> ::= <limited-string> | "*" # <coords> ::= "LAT" <number> "LON" <number> # <units> ::= ('km' | 'mi' | 'nm') # <distance> ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?) #-------------------------------------------------------------------------------------- python_string = quotedString.setParseAction(removeQuotes) wildcard_string = python_string limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes) field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes) coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number units = CaselessLiteral('km') | CaselessLiteral('mi') distance = number + units distance.setParseAction( lambda x : self.frame.update({'dist' : float(x[0]), 'units' : x[1]})) #-------------------------------------------------------------------------------------- # Date #-------------------------------------------------------------------------------------- date = python_string #-------------------------------------------------------------------------------------- # <query-filter> ::= "FILTER" <python-string> # <index-name> ::= <python-string> # <resource-id> ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes) # <collection-id> ::= <resource-id> #-------------------------------------------------------------------------------------- query_filter = CaselessLiteral("FILTER") + python_string # Add the filter to the frame object query_filter.setParseAction(lambda x : self.frame.update({'filter' : x[1]})) index_name = MatchFirst(python_string) # Add the index to the frame object index_name.setParseAction(lambda x : self.frame.update({'index' : x[0]})) resource_id = Regex(r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')').setParseAction(removeQuotes) collection_id = resource_id #-------------------------------------------------------------------------------------- # <from-statement> ::= "FROM" <number> # <to-statement> ::= "TO" <number> #-------------------------------------------------------------------------------------- from_statement = CaselessLiteral("FROM") + number from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]})) to_statement = CaselessLiteral("TO") + number to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]})) #-------------------------------------------------------------------------------------- # <date-from-statement> ::= "FROM" <date> # <date-to-statement> ::= "TO" <date> #-------------------------------------------------------------------------------------- date_from_statement = CaselessLiteral("FROM") + date date_from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]})) date_to_statement = CaselessLiteral("TO") + date date_to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]})) #-------------------------------------------------------------------------------------- # <time-query> ::= "TIME FROM" <date> "TO" <date> #-------------------------------------------------------------------------------------- time_query = CaselessLiteral("TIME") + Optional(date_from_statement) + Optional(date_to_statement) time_query.setParseAction(lambda x : self.time_frame()) # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}})) #-------------------------------------------------------------------------------------- # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- time_bounds = CaselessLiteral("TIMEBOUNDS") + date_from_statement + date_to_statement time_bounds.setParseAction(lambda x : self.time_bounds_frame()) #-------------------------------------------------------------------------------------- # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- vertical_bounds = CaselessLiteral("VERTICAL") + from_statement + to_statement vertical_bounds.setParseAction(lambda x : self.vertical_bounds_frame()) #-------------------------------------------------------------------------------------- # <range-query> ::= "VALUES" [<from-statement>] [<to-statement>] #-------------------------------------------------------------------------------------- range_query = CaselessLiteral("VALUES") + Optional(from_statement) + Optional(to_statement) # Add the range to the frame object range_query.setParseAction(lambda x : self.range_frame()) #-------------------------------------------------------------------------------------- # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords> # <geo-bbox> ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords> #-------------------------------------------------------------------------------------- geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords geo_distance.setParseAction(lambda x : self.frame.update({'lat': float(x[5]), 'lon':float(x[7])})) geo_bbox = CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords geo_bbox.setParseAction(lambda x : self.frame.update({'top_left':[float(x[5]),float(x[3])], 'bottom_right':[float(x[10]),float(x[8])]})) #-------------------------------------------------------------------------------------- # <field-query> ::= <wildcard-string> # <term-query> ::= "IS" <field-query> # <fuzzy-query> ::= "LIKE" <field-query> # <match-query> ::= "MATCH" <field-query> # <geo-query> ::= "GEO" ( <geo-distance> | <geo-bbox> ) #-------------------------------------------------------------------------------------- field_query = wildcard_string term_query = CaselessLiteral("IS") + field_query term_query.setParseAction(lambda x : self.frame.update({'value':x[1]})) geo_query = CaselessLiteral("GEO") + ( geo_distance | geo_bbox ) fuzzy_query = CaselessLiteral("LIKE") + field_query fuzzy_query.setParseAction(lambda x : self.frame.update({'fuzzy':x[1]})) match_query = CaselessLiteral("MATCH") + field_query match_query.setParseAction(lambda x : self.frame.update({'match':x[1]})) #-------------------------------------------------------------------------------------- # <limit-parameter> ::= "LIMIT" <integer> # <depth-parameter> ::= "DEPTH" <integer> # <order-parameter> ::= "ORDER" "BY" <limited-string> # <offset-parameter> ::= "SKIP" <integer> # <query-parameter> ::= <order-paramater> | <limit-parameter> #-------------------------------------------------------------------------------------- limit_parameter = CaselessLiteral("LIMIT") + integer limit_parameter.setParseAction(lambda x: self.json_query.update({'limit' : int(x[1])})) depth_parameter = CaselessLiteral("DEPTH") + integer depth_parameter.setParseAction(lambda x: self.frame.update({'depth' : int(x[1])})) order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string order_parameter.setParseAction(lambda x: self.json_query.update({'order' : {x[2] : 'asc'}})) offset_parameter = CaselessLiteral("SKIP") + integer offset_parameter.setParseAction(lambda x : self.json_query.update({'skip' : int(x[1])})) query_parameter = limit_parameter | order_parameter | offset_parameter #-------------------------------------------------------------------------------------- # <search-query> ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]* # <collection-query> ::= "IN <collection-id>" # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ] # <owner-query> ::= "HAS" <resource-id> [ <depth-parameter> ] # <query> ::= <search-query> | <association-query> | <collection-query> | <owner-query> #-------------------------------------------------------------------------------------- search_query = CaselessLiteral("SEARCH") + field + (range_query | term_query | fuzzy_query | match_query | vertical_bounds | time_bounds | time_query | geo_query) + CaselessLiteral("FROM") + index_name # Add the field to the frame object search_query.setParseAction(lambda x : self.frame.update({'field' : x[1]})) collection_query = CaselessLiteral("IN") + collection_id collection_query.setParseAction(lambda x : self.frame.update({'collection': x[1]})) association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter) # Add the association to the frame object association_query.setParseAction(lambda x : self.frame.update({'association':x[2]})) owner_query = CaselessLiteral("HAS") + resource_id + Optional(depth_parameter) owner_query.setParseAction(lambda x : self.frame.update({'owner':x[1]})) query = search_query | association_query | collection_query | owner_query #-------------------------------------------------------------------------------------- # <primary-query> ::= <query> [<query-filter>] # <atom> ::= <query> # <intersection> ::= "AND" <atom> # <union> ::= "OR" <atom> # <sentence> ::= <primary-query> [<intersection>]* [<union>]* #-------------------------------------------------------------------------------------- primary_query = query + Optional(query_filter) # Set the primary query on the json_query to the frame and clear the frame primary_query.setParseAction(lambda x : self.push_frame()) atom = query intersection = CaselessLiteral("AND") + atom # Add an AND operation to the json_query and clear the frame intersection.setParseAction(lambda x : self.and_frame()) union = CaselessLiteral("OR") + atom # Add an OR operation to the json_query and clear the frame union.setParseAction(lambda x : self.or_frame()) self.sentence = primary_query + (intersection ^ union)*(0,None) + query_parameter*(0,None)
def _create_field_parser(metadata): """Create a parser that can handle field definitions. BFN like grammar:: rule ::= [pid | extend | override] json_id ["," aliases]":" body json_id ::= (letter|"_") (letter|digit|_)* aliases ::= json_id ["," aliases] pid ::= @persistent_identifier( level ) extend ::= @extend override ::= @override hidden ::= @hidden body ::=(creator* | derived | calculated) (extensions)* creator ::= [decorators] format "," tag "," expr derived ::= [decorators] expr calculated ::= [decorators] expr To check the syntactics of the parser extensions or decorators please go to :mod:`jsonalchemy.jsonext.parsers` """ indent_stack = [1] # Independent/special decorators persistent_identifier = ( Keyword('@persistent_identifier').suppress() + nestedExpr() ).setResultsName('pid').setParseAction(lambda toks: int(toks[0][0])) override = Keyword('@override').suppress()\ .setResultsName('override')\ .setParseAction(lambda toks: True) extend = Keyword('@extend').suppress()\ .setResultsName('extend')\ .setParseAction(lambda toks: True) hidden = Keyword('@hidden').suppress()\ .setResultsName('hidden')\ .setParseAction(lambda toks: True) rule_decorators = (Optional(persistent_identifier) & Optional(override) & Optional(extend) & Optional(hidden)) # Field definition decorators field_decorators = Each([ Optional(parser.parse_element(indent_stack)) for parser in six.itervalues(metadata.parsers) if issubclass(parser, DecoratorBaseExtensionParser) ]) # Creator rules creator_body = (Optional(field_decorators).setResultsName('decorators') + Word(alphas, alphanums + '_') + Literal(',').suppress() + quotedString.setParseAction(removeQuotes) + Literal(',').suppress() + PYTHON_ALLOWED_EXPR).setParseAction( lambda toks: { 'source_format': toks[-3], 'source_tags': toks[-2].split(' '), 'function': compile(toks[-1].strip(), '', 'eval'), 'type': 'creator', 'decorators': toks.decorators.asDict() }).setResultsName('creator_def', listAllMatches=True) creator = (Keyword('creator:').suppress() + indentedBlock(OneOrMore(creator_body), indent_stack)) # Derived and calculated rules der_calc_body = (Optional(field_decorators).setResultsName('decorators') + PYTHON_ALLOWED_EXPR) derived = (Keyword('derived:').suppress() + indentedBlock(der_calc_body, indent_stack)).setParseAction( lambda toks: { 'source_format': 'derived', 'source_tags': None, 'function': compile(toks[-1].strip(), '', 'eval'), 'type': 'derived', 'decorators': toks.decorators.asDict() }).setResultsName('derived_def') calculated = (Keyword('calculated:').suppress() + indentedBlock(der_calc_body, indent_stack)).setParseAction( lambda toks: { 'source_format': 'calculated', 'source_tags': None, 'function': compile(toks[-1].strip(), '', 'eval'), 'type': 'calculated', 'decorators': toks.decorators.asDict() }).setResultsName('calculated_def') rule_sections = [ Optional(creator | derived | calculated), ] rule_sections.extend([ Optional(parser.parse_element(indent_stack)) for parser in six.itervalues(metadata.parsers) if issubclass(parser, FieldBaseExtensionParser) ]) json_id = (IDENT + Optional(Suppress(',') + delimitedList(Word(alphanums + '_'))) + Suppress(':') ).setResultsName('field')\ .setParseAction(lambda toks: {'json_id': toks[0], 'aliases': toks[1:]}) rule = Group( Optional(rule_decorators) + json_id + indentedBlock(Each(rule_sections), indent_stack)) return OneOrMore(COMMENT.suppress() | rule)
def parse_element(cls, indent_stack): return (Keyword("@depends_on").suppress() + Literal('(').suppress() + delimitedList(quotedString.setParseAction(removeQuotes)) + Literal(')').suppress() ).setResultsName("depends_on")
def __init__(self): self.json_query = {'query': {}, 'and': [], 'or': []} self.tokens = None #-------------------------------------------------------------------------------------- # <integer> ::= 0-9 # <double> ::= 0-9 ('.' 0-9) # <number> ::= <integer> | <double> #-------------------------------------------------------------------------------------- integer = Regex(r'-?[0-9]+') # Word matches space for some reason double = Regex(r'-?[0-9]+.?[0-9]*') number = double | integer #-------------------------------------------------------------------------------------- # <python-string> ::= (String surrounded by double-quotes) # <wildcard-string> ::= <python-string> # <limited-string> ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes) # <field> ::= <limited-string> | "*" # <coords> ::= "LAT" <number> "LON" <number> # <units> ::= ('km' | 'mi' | 'nm') # <distance> ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?) #-------------------------------------------------------------------------------------- python_string = quotedString.setParseAction(removeQuotes) wildcard_string = python_string limited_string = Regex( r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction( removeQuotes) field = limited_string ^ CaselessLiteral('"*"').setParseAction( removeQuotes) coords = CaselessLiteral("LAT") + number + CaselessLiteral( "LON") + number units = CaselessLiteral('km') | CaselessLiteral('mi') distance = number + units distance.setParseAction(lambda x: self.frame.update({ 'dist': float(x[0]), 'units': x[1] })) #-------------------------------------------------------------------------------------- # Date #-------------------------------------------------------------------------------------- date = python_string #-------------------------------------------------------------------------------------- # <query-filter> ::= "FILTER" <python-string> # <index-name> ::= <python-string> # <resource-id> ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes) # <collection-id> ::= <resource-id> #-------------------------------------------------------------------------------------- query_filter = CaselessLiteral("FILTER") + python_string # Add the filter to the frame object query_filter.setParseAction( lambda x: self.frame.update({'filter': x[1]})) index_name = MatchFirst(python_string) # Add the index to the frame object index_name.setParseAction(lambda x: self.frame.update({'index': x[0]})) resource_id = Regex( r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')' ).setParseAction(removeQuotes) collection_id = resource_id #-------------------------------------------------------------------------------------- # <from-statement> ::= "FROM" <number> # <to-statement> ::= "TO" <number> #-------------------------------------------------------------------------------------- from_statement = CaselessLiteral("FROM") + number from_statement.setParseAction( lambda x: self.frame.update({'from': x[1]})) to_statement = CaselessLiteral("TO") + number to_statement.setParseAction(lambda x: self.frame.update({'to': x[1]})) #-------------------------------------------------------------------------------------- # <date-from-statement> ::= "FROM" <date> # <date-to-statement> ::= "TO" <date> #-------------------------------------------------------------------------------------- date_from_statement = CaselessLiteral("FROM") + date date_from_statement.setParseAction( lambda x: self.frame.update({'from': x[1]})) date_to_statement = CaselessLiteral("TO") + date date_to_statement.setParseAction( lambda x: self.frame.update({'to': x[1]})) #-------------------------------------------------------------------------------------- # <time-query> ::= "TIME FROM" <date> "TO" <date> #-------------------------------------------------------------------------------------- time_query = CaselessLiteral("TIME") + Optional( date_from_statement) + Optional(date_to_statement) time_query.setParseAction(lambda x: self.time_frame()) # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}})) #-------------------------------------------------------------------------------------- # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- time_bounds = CaselessLiteral( "TIMEBOUNDS") + date_from_statement + date_to_statement time_bounds.setParseAction(lambda x: self.time_bounds_frame()) #-------------------------------------------------------------------------------------- # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- vertical_bounds = CaselessLiteral( "VERTICAL") + from_statement + to_statement vertical_bounds.setParseAction(lambda x: self.vertical_bounds_frame()) #-------------------------------------------------------------------------------------- # <range-query> ::= "VALUES" [<from-statement>] [<to-statement>] #-------------------------------------------------------------------------------------- range_query = CaselessLiteral("VALUES") + Optional( from_statement) + Optional(to_statement) # Add the range to the frame object range_query.setParseAction(lambda x: self.range_frame()) #-------------------------------------------------------------------------------------- # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords> # <geo-bbox> ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords> #-------------------------------------------------------------------------------------- geo_distance = CaselessLiteral( "DISTANCE") + distance + CaselessLiteral("FROM") + coords geo_distance.setParseAction( lambda x: self.frame.update({ 'lat': float(x[5]), 'lon': float(x[7]) })) geo_bbox = CaselessLiteral("BOX") + CaselessLiteral( "TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords geo_bbox.setParseAction(lambda x: self.frame.update({ 'top_left': [float(x[5]), float(x[3])], 'bottom_right': [float(x[10]), float(x[8])] })) #-------------------------------------------------------------------------------------- # <field-query> ::= <wildcard-string> # <term-query> ::= "IS" <field-query> # <fuzzy-query> ::= "LIKE" <field-query> # <match-query> ::= "MATCH" <field-query> # <geo-query> ::= "GEO" ( <geo-distance> | <geo-bbox> ) #-------------------------------------------------------------------------------------- field_query = wildcard_string term_query = CaselessLiteral("IS") + field_query term_query.setParseAction(lambda x: self.frame.update({'value': x[1]})) geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox) fuzzy_query = CaselessLiteral("LIKE") + field_query fuzzy_query.setParseAction( lambda x: self.frame.update({'fuzzy': x[1]})) match_query = CaselessLiteral("MATCH") + field_query match_query.setParseAction( lambda x: self.frame.update({'match': x[1]})) #-------------------------------------------------------------------------------------- # <limit-parameter> ::= "LIMIT" <integer> # <depth-parameter> ::= "DEPTH" <integer> # <order-parameter> ::= "ORDER" "BY" <limited-string> # <offset-parameter> ::= "SKIP" <integer> # <query-parameter> ::= <order-paramater> | <limit-parameter> #-------------------------------------------------------------------------------------- limit_parameter = CaselessLiteral("LIMIT") + integer limit_parameter.setParseAction( lambda x: self.json_query.update({'limit': int(x[1])})) depth_parameter = CaselessLiteral("DEPTH") + integer depth_parameter.setParseAction( lambda x: self.frame.update({'depth': int(x[1])})) order_parameter = CaselessLiteral("ORDER") + CaselessLiteral( "BY") + limited_string order_parameter.setParseAction( lambda x: self.json_query.update({'order': { x[2]: 'asc' }})) offset_parameter = CaselessLiteral("SKIP") + integer offset_parameter.setParseAction( lambda x: self.json_query.update({'skip': int(x[1])})) query_parameter = limit_parameter | order_parameter | offset_parameter #-------------------------------------------------------------------------------------- # <search-query> ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]* # <collection-query> ::= "IN <collection-id>" # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ] # <owner-query> ::= "HAS" <resource-id> [ <depth-parameter> ] # <query> ::= <search-query> | <association-query> | <collection-query> | <owner-query> #-------------------------------------------------------------------------------------- search_query = CaselessLiteral("SEARCH") + field + ( range_query | term_query | fuzzy_query | match_query | vertical_bounds | time_bounds | time_query | geo_query) + CaselessLiteral("FROM") + index_name # Add the field to the frame object search_query.setParseAction( lambda x: self.frame.update({'field': x[1]})) collection_query = CaselessLiteral("IN") + collection_id collection_query.setParseAction( lambda x: self.frame.update({'collection': x[1]})) association_query = CaselessLiteral("BELONGS") + CaselessLiteral( "TO") + resource_id + Optional(depth_parameter) # Add the association to the frame object association_query.setParseAction( lambda x: self.frame.update({'association': x[2]})) owner_query = CaselessLiteral("HAS") + resource_id + Optional( depth_parameter) owner_query.setParseAction( lambda x: self.frame.update({'owner': x[1]})) query = search_query | association_query | collection_query | owner_query #-------------------------------------------------------------------------------------- # <primary-query> ::= <query> [<query-filter>] # <atom> ::= <query> # <intersection> ::= "AND" <atom> # <union> ::= "OR" <atom> # <sentence> ::= <primary-query> [<intersection>]* [<union>]* #-------------------------------------------------------------------------------------- primary_query = query + Optional(query_filter) # Set the primary query on the json_query to the frame and clear the frame primary_query.setParseAction(lambda x: self.push_frame()) atom = query intersection = CaselessLiteral("AND") + atom # Add an AND operation to the json_query and clear the frame intersection.setParseAction(lambda x: self.and_frame()) union = CaselessLiteral("OR") + atom # Add an OR operation to the json_query and clear the frame union.setParseAction(lambda x: self.or_frame()) self.sentence = primary_query + (intersection ^ union) * ( 0, None) + query_parameter * (0, None)
from HTMLParser import HTMLParser from urllib import unquote # parse dict-like syntax from pyparsing import (Suppress, Regex, quotedString, Word, alphas, Group, alphanums, oneOf, Forward, Optional, dictOf, delimitedList, removeQuotes) LBRACK,RBRACK,LBRACE,RBRACE,COLON,COMMA = map(Suppress,"[]{}:,") integer = Regex(r"[+-]?\d+").setParseAction(lambda t:int(t[0])) real = Regex(r"[+-]?\d+\.\d*").setParseAction(lambda t:float(t[0])) string_ = Word(alphas,alphanums+"_") | quotedString.setParseAction(removeQuotes) bool_ = oneOf("true false").setParseAction(lambda t: t[0]=="true") jsParser = Forward() key = string_ dict_ = LBRACE - Optional(dictOf(key+COLON, jsParser+Optional(COMMA))) + RBRACE list_ = LBRACK - Optional(delimitedList(jsParser)) + RBRACK jsParser << (real | integer | string_ | bool_ | Group(list_ | dict_ )) class WebParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.stk = [] self.result = {} def handle_starttag(self, tag, attrs): self.stk.append(tag) def handle_endtag(self, tag): self.stk.pop() def handle_data(self, data): b = 'var hClientFlashVars =' if len(self.stk) and not self.stk[-1:][0].lower() == 'script': return
def __init__(self): self.queryparts = {} stringValue = quotedString.setParseAction(removeQuotes) intValue = Word(nums).setParseAction(lambda t:int(t[0])) floatValue = Combine(Word(nums) + "." + Word(nums)).setParseAction(lambda t:float(t[0])) value = stringValue | floatValue | intValue valueList = delimitedList(value) openparenthesis = Suppress(Literal("(")) closeparenthesis = Suppress(Literal(")")) opensquarebracket = Suppress(Literal("[")) closesquarebracket = Suppress(Literal("]")) comma = Suppress(Literal(",")) field = quotedString.setParseAction(removeQuotes) fieldList = delimitedList(field) BQLType = CaselessKeyword("$double") | CaselessKeyword("$string") | \ CaselessKeyword("$object") | CaselessKeyword("$array") | \ CaselessKeyword("$binary") | CaselessKeyword("$bool") | \ CaselessKeyword("$date") | CaselessKeyword("$null") | \ CaselessKeyword("$int32") | CaselessKeyword("$int") | \ CaselessKeyword("$int64") | CaselessKeyword("$timestamp") | \ CaselessKeyword("$exists") | CaselessKeyword("$nexists") BQLCompareOperator = CaselessKeyword("$lt") | CaselessKeyword("$lte") | \ CaselessKeyword("$gt") | CaselessKeyword("$gte") | \ CaselessKeyword("$eq") | CaselessKeyword("$neq") | \ CaselessKeyword("$regex") BQLIncludeOperator = CaselessKeyword("$in") | CaselessKeyword("$nin") typecheck = BQLType + openparenthesis + Optional(fieldList) + closeparenthesis typecheck.setParseAction(self.parseType) comparison = BQLCompareOperator + openparenthesis + field + comma + value + closeparenthesis comparison.setParseAction(self.parseComparison) inclusion = BQLIncludeOperator + openparenthesis + field + comma + \ opensquarebracket + Optional(valueList) + closesquarebracket + closeparenthesis inclusion.setParseAction(self.parseInclusion) BQLTokens = ZeroOrMore(typecheck | comparison | inclusion) #----------------------------------------------------------------------- # Query Select Statement Parsing #----------------------------------------------------------------------- SELECT = (Suppress(CaselessKeyword("Select")) + openparenthesis + \ Optional(fieldList) + closeparenthesis) SELECT.setParseAction(self.parseSelect) #----------------------------------------------------------------------- # Query From Statement Parsing #----------------------------------------------------------------------- FROM = (Suppress(CaselessKeyword("From")) + openparenthesis + \ Optional(fieldList) + closeparenthesis) FROM.setParseAction(self.parseFrom) #----------------------------------------------------------------------- # Query Where Statement Parsing #----------------------------------------------------------------------- AND = (Suppress(CaselessKeyword("And")) + openparenthesis + \ BQLTokens + closeparenthesis).setParseAction(self.parseAnd) OR = (Suppress(CaselessKeyword("Or")) + openparenthesis + \ BQLTokens + closeparenthesis).setParseAction(self.parseOr) WHERE = (Suppress(CaselessKeyword("Where")) + BQLTokens).setParseAction(self.parseWhere)\ + ZeroOrMore(OR | AND) #----------------------------------------------------------------------- # Resultset Management Statement Parsing #----------------------------------------------------------------------- # distinct DISTINCT = (Suppress(CaselessKeyword("Distinct")) + openparenthesis + \ fieldList + closeparenthesis).setParseAction(self.parseDistinct) # limit LIMIT = (Suppress(CaselessKeyword("Limit")) + openparenthesis + \ intValue + closeparenthesis).setParseAction(self.parseLimit) # sort ascending SORT = (CaselessKeyword("Asc") | CaselessKeyword("Desc")) + openparenthesis + \ fieldList + closeparenthesis SORT.setParseAction(self.parseSort) CURSOR_CONTROL = ZeroOrMore(DISTINCT | LIMIT | SORT) #----------------------------------------------------------------------- # Query parser build up #----------------------------------------------------------------------- self.parser = SELECT + FROM + Optional(WHERE) + CURSOR_CONTROL + StringEnd()
Literal("sfixed64") | Literal("bool") | Literal("string") | Literal("bytes") ).setResultsName("fieldType") messageRecursive = Forward() msgName = Word(alphas).setResultsName("messageName") fieldName = Word(alphas).setResultsName("fieldName") default = Literal("[") + \ Literal("default") + \ quotedString.setParseAction( removeQuotes ).setResultsName("defaultValue") + \ Literal("]") fieldUsage = ( Literal("required") | \ Literal("optional") ).setResultsName("fieldUsage") tag = Word(nums).setResultsName("tag") field = Group( fieldUsage + \ fieldType + \ fieldName + \ Literal("=") + \
elif t == '-': # Next tokens needs to be negated negation = True else: # Append to query the token if negation: t = ~t if operation == 'or': query |= t else: query &= t return query NO_BRTS = printables.replace('(', '').replace(')', '') SINGLE = Word(NO_BRTS.replace('*', '')) WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS) QUOTED = quotedString.setParseAction(removeQuotes) OPER_AND = CaselessLiteral('and') OPER_OR = CaselessLiteral('or') OPER_NOT = '-' TERM = Combine( Optional(Word(alphas).setResultsName('meta') + ':') + (QUOTED.setResultsName('query') | WILDCARDS.setResultsName('query'))) TERM.setParseAction(createQ) EXPRESSION = operatorPrecedence( TERM, [(OPER_NOT, 1, opAssoc.RIGHT), (OPER_OR, 2, opAssoc.LEFT), (Optional(OPER_AND, default='and'), 2, opAssoc.LEFT)]) EXPRESSION.setParseAction(unionQ)
# Projection columnName = delimitedList(identifier, DOT, combine=True).setParseAction(Identifier)( "column" ) # TODO: x AS y, x y, x `y`, x 'y', `x`, 'x' columnNameList = Group(delimitedList(STAR | columnName)).setParseAction(ListValue) tableName = delimitedList(identifier, DOT, combine=True).setParseAction(Identifier)("table") tableNameList = Group(delimitedList(tableName)).setParseAction(ListValue) whereExpr = Forward() # WHERE # TODO: indirect comparisons (e.g. "table1.field1.xyz = 3" becomes "table1.any(field1.xyz == 3)") # TODO: math expression grammar (for both lval and rval) equalityOp = OP_VAL_NULLSAFE_EQUAL ^ OP_EQUAL ^ OP_NOTEQUAL ^ OP_LT ^ OP_GT ^ OP_GTE ^ OP_LTE likeOp = Optional(LOGOP_NOT) + OP_LIKE betweenOp = Optional(LOGOP_NOT) + OP_BETWEEN # [ NOT ] BETWEEN stringValue = quotedString.setParseAction(StringValue) realNumber = ( Combine( Optional(sign) + ( # decimal present ((Word(nums) + DOT + Optional(Word(nums)) | (DOT + Word(nums))) + Optional(E + Optional(sign) + Word(nums))) | # negative exp (Word(nums) + Optional(E + Optional(MINUS) + Word(nums))) ) ).setParseAction(RealValue) ).setName( "real" ) # .1, 1.2, 1.2e3, -1.2e+3, 1.2e-3 intNumber = (
class SearchParser(object): """The parser for bauble.search.MapperSearch """ numeric_value = Regex(r'[-]?\d+(\.\d*)?([eE]\d+)?').setParseAction( NumericToken)('number') unquoted_string = Word(alphanums + alphas8bit + '%.-_*;:') string_value = (quotedString.setParseAction(removeQuotes) | unquoted_string).setParseAction(StringToken)('string') none_token = Literal('None').setParseAction(NoneToken) empty_token = Literal('Empty').setParseAction(EmptyToken) value_list = Forward() typed_value = (Literal("|") + unquoted_string + Literal("|") + value_list + Literal("|")).setParseAction(TypedValueToken) value = (typed_value | numeric_value | none_token | empty_token | string_value).setParseAction(ValueToken)('value') value_list << Group(OneOrMore(value) ^ delimitedList(value) ).setParseAction(ValueListAction)('value_list') domain = Word(alphas, alphanums) binop = oneOf('= == != <> < <= > >= not like contains has ilike ' 'icontains ihas is') equals = Literal('=') star_value = Literal('*') domain_values = (value_list.copy())('domain_values') domain_expression = ( (domain + equals + star_value + stringEnd) | (domain + binop + domain_values + stringEnd) ).setParseAction(DomainExpressionAction)('domain_expression') AND_ = WordStart() + (CaselessLiteral("AND") | Literal("&&")) + WordEnd() OR_ = WordStart() + (CaselessLiteral("OR") | Literal("||")) + WordEnd() NOT_ = WordStart() + (CaselessLiteral("NOT") | Literal('!')) + WordEnd() BETWEEN_ = WordStart() + CaselessLiteral("BETWEEN") + WordEnd() query_expression = Forward()('filter') identifier = Group(delimitedList(Word(alphas + '_', alphanums + '_'), '.')).setParseAction(IdentifierToken) ident_expression = ( Group(identifier + binop + value).setParseAction(IdentExpressionToken) | (Literal('(') + query_expression + Literal(')')).setParseAction(ParenthesisedQuery)) between_expression = Group(identifier + BETWEEN_ + value + AND_ + value).setParseAction(BetweenExpressionAction) query_expression << infixNotation( (ident_expression | between_expression), [(NOT_, 1, opAssoc.RIGHT, SearchNotAction), (AND_, 2, opAssoc.LEFT, SearchAndAction), (OR_, 2, opAssoc.LEFT, SearchOrAction)]) query = (domain + Keyword('where', caseless=True).suppress() + Group(query_expression) + stringEnd).setParseAction(QueryAction) statement = (query('query') | domain_expression('domain') | value_list('value_list') ).setParseAction(StatementAction)('statement') def parse_string(self, text): '''request pyparsing object to parse text `text` can be either a query, or a domain expression, or a list of values. the `self.statement` pyparsing object parses the input text and return a pyparsing.ParseResults object that represents the input ''' return self.statement.parseString(text)