def _define_value(): TRUE = _make_keyword('true', True) FALSE = _make_keyword('false', False) NULL = _make_keyword('null', None) STRING = quotedString().setParseAction(removeQuotes) NUMBER = pyparsing_common.number() return TRUE | FALSE | NULL | STRING | NUMBER
def parse_header_line(lineString): lineName = Literal("##").suppress() + (Literal("FORMAT") | Literal("INFO")) sentence = quotedString(r'"').setParseAction(removeQuotes) def make_kv(key, valParser): return Literal(key) + Literal("=").suppress() + valParser keyVal = make_kv( "ID", Word(alphas + nums + '.') ) | make_kv( "Type", ( Literal("Float") | Literal("String") | Literal("Integer") | Literal("Character") | Literal("Flag") ) ) | \ make_kv("Description", sentence) | \ make_kv("Number", Word(alphas + nums)) fields = delimitedList(keyVal, ",") line = lineName + Literal("=<").suppress() + fields + Literal( ">").suppress() pairs = lambda xs: [] if len(xs) == 0 else [(xs[0], xs[1])] + pairs(xs[2:]) try: res = line.leaveWhitespace().parseString(lineString) except ParseException as e: print(lineString) raise e metadata_type = res[0] # e.g. INFO/FORMAT schema = dict(pairs(res[1:])) return schema
def parse_header_line(lineString): lineName = Literal("##").suppress() + (Literal("FORMAT") | Literal("INFO")) sentence = quotedString(r'"').setParseAction(removeQuotes) def make_kv(key, valParser): return Literal(key) + Literal("=").suppress() + valParser keyVal = make_kv( "ID", Word(alphas + nums + '.') ) | make_kv( "Type", ( Literal("Float") | Literal("String") | Literal("Integer") | Literal("Character") | Literal("Flag") ) ) | \ make_kv("Description", sentence) | \ make_kv("Number", Word(alphas + nums)) fields = delimitedList(keyVal, ",") line = lineName + Literal("=<").suppress() + fields + Literal(">").suppress() pairs = lambda xs: [] if len(xs) == 0 else [(xs[0], xs[1])] + pairs(xs[2:]) try: res = line.leaveWhitespace().parseString(lineString) except ParseException as e: print(lineString) raise e metadata_type = res[0] # e.g. INFO/FORMAT schema = dict(pairs(res[1:])) return schema
def build(parsers: dict): comma = Literal(",") rb = Literal(")") lb = Literal("(") srb = Literal("]") slb = Literal("[") number = Regex(r"0|[1-9][0-9]*") string = quotedString() name = Word(alphanums) label = Keyword(STRONG) | Keyword(WEAK) | Literal(SHORT_WEAK) param = Combine(Keyword(PARAM) + slb + number + srb) marker = Keyword(RESULT) | Keyword(TRUE) | Keyword(FALSE) | Keyword( THIS) | Keyword(_THIS) | param function = Keyword(GET) get = Literal(GETATTR) operator1 = Literal(MUL) | Literal(DIV) | Literal(MOD) operator2 = Literal(ADD) | Literal(SUB) operator3 = Literal(EQUAL) | Literal(NOT_EQUAL) operator3 |= And(Keyword(word) for word in IS_NOT.split(" ")) | Keyword(IS) operator4 = Literal(GREATER_OR_EQUAL) | Literal(GREATER) | Literal( LOWER_OR_EQUAL) | Literal(LOWER) operator5 = Keyword(AND) operator6 = Keyword(OR) operator7 = Keyword(FOLLOW) expression = Forward() string_st = string.setParseAction(parsers[STRING]) name_st = name.setParseAction(parsers[STRING]) marker_st = marker.setParseAction(parsers[MARKER]) tuple_st = expression + ZeroOrMore(comma + expression) round_invocation_st = (lb + Optional(tuple_st) + rb).setParseAction( parsers[INVOCATION]) function_st = (function + Suppress(round_invocation_st)).setParseAction( parsers[FUNCTION]) getattr_st = (marker_st | name_st) + OneOrMore( (get + Suppress(name_st)).setParseAction(parsers[OPERATOR])) atom_st = (lb + expression + rb) | function_st | string_st | getattr_st | marker_st operator_st = atom_st + ZeroOrMore( (operator1 + Suppress(atom_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator2 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator3 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator4 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator5 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator6 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator7 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) expression << operator_st getattr_st.enablePackrat() statement = (Optional(label, STRONG) + Suppress(expression)).setParseAction(parsers[LABEL]) return ZeroOrMore(statement) + StringEnd()
def _get_requirements_build_gradle(path: str) -> list: """ Get list of requirements from Maven project. Files supported are build.gradle :param path: Project path """ reqs = [] for file_path in full_paths_in_dir(path): if not file_path.endswith('build.gradle'): continue with open(file_path, encoding='latin-1') as file_fd: file_content = file_fd.read() string = MatchFirst([quotedString('"'), quotedString("'")]) string.setParseAction(lambda x: [x[0][1:-1]]) grammars: list = [ Suppress(Keyword('compile') + Optional('(')) + string.copy()('package'), Suppress(Keyword('compile') + Optional('(')) + Suppress(Keyword('group') + ':') + string.copy()('group') + Suppress(',') + Suppress(Keyword('name') + ':') + string.copy()('name') + Suppress(',') + Suppress(Keyword('version') + ':') + string.copy()('version'), ] for grammar in grammars: for tokens, _, _ in grammar.scanString(file_content): matches = tokens.asDict() if 'package' in matches: if ':' in matches['package']: name, version = matches['package'].rsplit(':', 1) else: name, version = matches['package'], None reqs.append((file_path, name, version)) else: reqs.append( (file_path, f"{matches['group']}:{matches['name']}", matches['version'])) reqs.append( (file_path, matches['group'], matches['version'])) return reqs
def suite_grammar(): quote = pp.Word('"\'', exact=1).suppress() colon = pp.Literal(':').suppress() l_paren = pp.Literal('(').suppress() r_paren = pp.Literal(')').suppress() sms_node_path = pp.Word('./_' + pp.alphanums) identifier = pp.Word(pp.alphanums, pp.alphanums + '_') var_value = pp.Word(pp.printables) ^ pp.quotedString(pp.printables) sms_comment = pp.Word('#') + pp.Optional(pp.restOfLine) sms_var = pp.Group(pp.Keyword('edit') + identifier + var_value) sms_label = pp.Group(pp.Keyword('label') + identifier + var_value) sms_meter = pp.Group( pp.Keyword('meter') + identifier + pp.Word(pp.nums) * 3) sms_limit = pp.Group( pp.Keyword('limit') + identifier + pp.Word(pp.nums)) sms_in_limit = pp.Group( pp.Keyword('inlimit') + sms_node_path + colon + identifier) sms_trigger = pp.Group(pp.Keyword('trigger') + pp.restOfLine) sms_repeat = pp.Group(pp.Keyword('repeat') + pp.Keyword('date') + \ identifier + pp.Word(pp.nums) * 2 + pp.Optional(pp.Word(pp.nums))) sms_defstatus = pp.Group(pp.Keyword('defstatus') + (pp.Keyword('suspended') ^ \ pp.Keyword('complete') ^ pp.Keyword('queued'))) sms_clock = pp.Group(pp.Keyword('clock') + pp.Keyword('hybrid') + \ pp.Word(pp.nums)) sms_time = pp.Group( pp.Keyword('time') + pp.ZeroOrMore(pp.Word(pp.nums + ':') ^ pp.Word(pp.nums + ':+'))) sms_task = pp.Group( pp.Keyword('task') + \ identifier + \ pp.ZeroOrMore( sms_defstatus ^ sms_trigger ^ sms_in_limit ^ sms_label ^ \ sms_meter ^ sms_var ^ sms_time ) ) + pp.Optional(pp.Keyword('endtask').suppress()) sms_family = pp.Forward() sms_family << pp.Group( pp.Keyword('family') + identifier + pp.ZeroOrMore( sms_defstatus ^ sms_in_limit ^ sms_limit ^ sms_trigger ^ \ sms_var ^ sms_task ^ sms_family ^ sms_repeat ^ sms_time ) ) + pp.Keyword('endfamily').suppress() sms_suite = pp.Keyword('suite') + identifier + \ pp.ZeroOrMore(sms_clock ^ sms_limit ^ sms_defstatus ^ sms_var ^ sms_family) + \ pp.Keyword('endsuite').suppress() sms_suite.ignore(sms_comment) return sms_suite
def suite_grammar(): quote = pp.Word('"\'', exact=1).suppress() colon = pp.Literal(':').suppress() l_paren = pp.Literal('(').suppress() r_paren = pp.Literal(')').suppress() sms_node_path = pp.Word('./_' + pp.alphanums) identifier = pp.Word(pp.alphanums, pp.alphanums + '_') var_value = pp.Word(pp.printables) ^ pp.quotedString(pp.printables) sms_comment = pp.Word('#') + pp.Optional(pp.restOfLine) sms_var = pp.Group(pp.Keyword('edit') + identifier + var_value) sms_label = pp.Group(pp.Keyword('label') + identifier + var_value) sms_meter = pp.Group(pp.Keyword('meter') + identifier + pp.Word(pp.nums) * 3) sms_limit = pp.Group(pp.Keyword('limit') + identifier + pp.Word(pp.nums)) sms_in_limit = pp.Group(pp.Keyword('inlimit') + sms_node_path + colon + identifier) sms_trigger = pp.Group(pp.Keyword('trigger') + pp.restOfLine) sms_repeat = pp.Group(pp.Keyword('repeat') + pp.Keyword('date') + \ identifier + pp.Word(pp.nums) * 2 + pp.Optional(pp.Word(pp.nums))) sms_defstatus = pp.Group(pp.Keyword('defstatus') + (pp.Keyword('suspended') ^ \ pp.Keyword('complete') ^ pp.Keyword('queued'))) sms_clock = pp.Group(pp.Keyword('clock') + pp.Keyword('hybrid') + \ pp.Word(pp.nums)) sms_time = pp.Group( pp.Keyword('time') + pp.ZeroOrMore( pp.Word(pp.nums + ':') ^ pp.Word(pp.nums + ':+') ) ) sms_task = pp.Group( pp.Keyword('task') + \ identifier + \ pp.ZeroOrMore( sms_defstatus ^ sms_trigger ^ sms_in_limit ^ sms_label ^ \ sms_meter ^ sms_var ^ sms_time ) ) + pp.Optional(pp.Keyword('endtask').suppress()) sms_family = pp.Forward() sms_family << pp.Group( pp.Keyword('family') + identifier + pp.ZeroOrMore( sms_defstatus ^ sms_in_limit ^ sms_limit ^ sms_trigger ^ \ sms_var ^ sms_task ^ sms_family ^ sms_repeat ^ sms_time ) ) + pp.Keyword('endfamily').suppress() sms_suite = pp.Keyword('suite') + identifier + \ pp.ZeroOrMore(sms_clock ^ sms_limit ^ sms_defstatus ^ sms_var ^ sms_family) + \ pp.Keyword('endsuite').suppress() sms_suite.ignore(sms_comment) return sms_suite
def _construct_parser(self): '''Construct and return parser.''' field = Word(alphanums + '_.') operator = oneOf(list(self._operators.keys())) value = Word(alphanums + '-_,./*@+') quoted_value = quotedString('quoted_value').setParseAction(removeQuotes) condition = Group( field + operator + (quoted_value | value) )('condition') not_ = Optional(Suppress(CaselessKeyword('not')))('not') and_ = Suppress(CaselessKeyword('and'))('and') or_ = Suppress(CaselessKeyword('or'))('or') expression = Forward() parenthesis = Suppress('(') + expression + Suppress(')') previous = condition | parenthesis for conjunction in (not_, and_, or_): current = Forward() if conjunction in (and_, or_): conjunction_expression = ( FollowedBy(previous + conjunction + previous) + Group( previous + OneOrMore(conjunction + previous) )(conjunction.resultsName) ) elif conjunction in (not_, ): conjunction_expression = ( FollowedBy(conjunction.expr + current) + Group(conjunction + current)(conjunction.resultsName) ) else: # pragma: no cover raise ValueError('Unrecognised conjunction.') current <<= (conjunction_expression | previous) previous = current expression <<= previous return expression('expression')
def create_grammar(self): self.beg = SkipTo(LineStart() + Literal('/*')*(0, 1) + Literal('stage'), ignore=Literal('stages')) self.block = Forward() self.parallel = Suppress('parallel') + self.nested(self.block) self.parallel.setParseAction(lambda t: t[0]) self.environment = Suppress('environment') + self.nested() self.stage_content = ( self.nested((self.parallel | self.environment.suppress()), 'parallel') | self.nested().suppress() ) self.stage = Group( Suppress('stage' + '(') + quotedString('stage_name').setParseAction(removeQuotes) + Suppress(')') + self.stage_content)( self.STAGE_KEY + '*' ) self.commented_stage = Group(Suppress('/*') + self.stage + Suppress('*/'))(self.COMMENTED_STAGE_KEY + '*') self.any_stage = self.stage | self.commented_stage self.block << Group(self.parallel | self.any_stage)('block*')
def connexion(query): # define SQL tokens stmt = Forward() MYSQL, U, P = map(CaselessKeyword, "mysql u p".split()) ident = Word(alphas).setName("identifier") userName = delimitedList(ident).setName("user") userName.addParseAction(ppc.upcaseTokens) intValue = ppc.signed_integer() pwdValue = quotedString() # define the grammar stmt <<= (MYSQL + Literal('-') + U + userName("user") + Literal('-') + P + pwdValue("pwd") + Literal(';')) simpleSQL = stmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine simpleSQL.ignore(oracleSqlComment) return simpleSQL.parseString(query)
def parser(file_name): """ Parsing goes here """ LOG.info('Parser Started') with open(file_name, 'r') as input_config: input_data = input_config.read() input_config.seek(0, 2) file_size = input_config.tell() # grammer def:1 source-address-list Anywhere owner System key = Keyword("source-address-list") name = Word(printables) system = Keyword("owner") system_name = Word(alphanums) comment_key = Keyword("comments") comment = quotedString | Word(printables) grammer1 = Group(key + name + system + system_name + Optional(comment_key + comment)) # grammer def:2 ip address 10.10.10.10 255.255.255.255 key1 = "ip address" ipaddress = Combine(Word(nums) + ('.' + Word(nums)) * 3) ipaddress1 = Empty().addParseAction( replaceWith('Mask')) + Combine(Word(nums) + ('.' + Word(nums)) * 3) grammer2 = Group(key1 + Optional(ipaddress) + Optional(ipaddress1) + Optional('::/0')) # grammer def:3 domain-list <name> owner System key = Keyword("domain-list") name = Word(printables) own = Keyword("owner") owner_name = Word(alphanums) comments = Keyword('comment') comment = quotedString | Word(printables) grammer3 = Group(key + name + own + owner_name + Optional(comments + comment)) # grammer def:4 domain_key = Keyword("domain") domain_name = Word(printables) grammer4 = Group(domain_key + ~Literal('-') + domain_name) # grammer def:4 answer vip 10.10.10.10 name <name> location # "<location>" manual-reactivation disable activate answer_key = Keyword("answer vip") ipaddress = Combine(Word(nums) + ('.' + Word(nums)) * 3) name_key = Keyword("name") name = Word(alphanums) location_key = Keyword("location") location = quotedString manual_reactivation_key = Keyword("manual-reactivation") manual_reactivation = Keyword("disable") | Keyword("enable") activate_key = Empty().addParseAction( replaceWith('Mode')) + Keyword("activate") grammer5 = Group(answer_key + ipaddress + name_key + name + location_key + location + Optional(manual_reactivation_key) + Optional(manual_reactivation) + Optional(activate_key)) # grammer6 : keepalive type tcp port <port> ip-address <ip> <<retries>> # <<successful-probes>> <<termination>> # keepalive type http-head port 80 <<path>> <<retries>> # <<successful-probes>> <<shared>> <<termination>> # keepalive type icmp ip-address <ip> <<retries>> # <<successful-probes>> key = Keyword('keepalive') tcp_key = Keyword("type tcp") http_key = Keyword("type http-head") icmp_key = Keyword("type icmp") port_key = Keyword("port") num = Word(nums) ip_add_key = Keyword("ip-address") ip_add = Combine(Word(nums) + ('.' + Word(nums)) * 3) retry = Optional(Keyword('retries') + num) probe = Optional(Keyword('successful-probes') + num) shared = Optional(Keyword('shared') + ip_add) path = Optional(Keyword('path') + ip_add) termination = Optional( Keyword('termination') + (Word('graceful') | Word('reset'))) grammer6_1 = Group(key + tcp_key + port_key + num + ip_add_key + ip_add + Optional(retry) + Optional(probe) + Optional(termination)) grammer6_2 = Group(key + http_key + path + port_key + num + retry + probe + shared + termination) grammer6_3 = Group(key + icmp_key + Optional(ip_add_key + ip_add) + retry + probe) grammer6 = grammer6_1 | grammer6_2 | grammer6_3 # grammer 7: answer-group <name> owner System type vip comment "comment" key = Keyword("answer-group") key_name = Word(printables) owner_key = Keyword("owner") owner_name = Word(alphanums) type_key = Keyword("type") type_name = Word(alphas) comment_key = Keyword("comments") comments = quotedString() | Word(printables) grammer7 = Group(key + key_name + owner_key + owner_name + type_key + type_name + Optional(comment_key + comments)) # grammer 8: answer-add 10.10.10.10 name MDC-PROD-SMTP-ACE \ # weight 1 order 1 load-threshold 254 suspend key = Keyword('answer-add') key_ip = Combine(Word(nums) + ('.' + Word(nums)) * 3) name_key = Keyword('name') name_val = Word(printables) weight_key = Keyword('weight') weight_val = Word(nums) order_key = Keyword('order') order_val = Word(nums) thres_key = Keyword('load-threshold') thres_val = Word(nums) suspend_key = Empty().addParseAction(replaceWith('Mode')) + Word(alphas) grammer8 = Group(key + key_ip + name_key + name_val + weight_key + weight_val + order_key + order_val + thres_key + thres_val + suspend_key) # grammer9:dns rule <rule name> owner System source-address-list # Anywhere domain-list <dl_name> activate # query a # <sticky | sticky method> <domain | domain-list> timeout 15 key = Keyword("dns rule") key_val = Word(printables) owner_key = Keyword("owner") owner_name = Word(alphas) saddlist_key = Keyword("source-address-list") saddlist_val = Word(alphanums) domain_key = Keyword("domain-list") domain_val = Word(printables) activate_key = Empty().addParseAction( replaceWith('Mode')) + Keyword("activate") query_key = Keyword("query") query_val = Word("a") | Word(printables) s_key = Keyword('sticky method') | Keyword('sticky') d_key = Keyword('domain') | Keyword('domain-list') t_key = Keyword('timeout') t_val = Word(nums) grammer9 = Group(key + key_val + owner_key + owner_name + saddlist_key + saddlist_val + domain_key + domain_val + activate_key + Optional(query_key + query_val) + Optional(s_key + d_key + t_key + t_val)) # grammer10 : # clause 1 vip-group <name> method ordered ttl 20 \ # count 1 <sticky|region-sticky> enabled manual-reactivation disable activate key = Keyword("clause") key_val = Word(nums) vip_key = Keyword("vip-group") vip_val = Word(printables) method_key = Keyword("method") method_val = Word(printables) ttl_key = Keyword("ttl") ttl_val = Word(nums) count_key = Keyword("count") count_val = Word(nums) sticky_key = Keyword("sticky") | Keyword("region-sticky") sticky_val = Word("enable") mr_key = Keyword("manual-reactivation") mr_val = Word("disable") state_key = Empty().addParseAction(replaceWith('Mode')) + Word("activate") grammer10 = Group(key + key_val + vip_key + vip_val + method_key + method_val + ttl_key + ttl_val + count_key + count_val + Optional(sticky_key + sticky_val) + mr_key + mr_val + state_key) testing = Group(grammer1 + ZeroOrMore(grammer2)) | Group( grammer3 + ZeroOrMore(grammer4)) | Group(grammer5 + ZeroOrMore(grammer6)) | Group( grammer7 + ZeroOrMore(grammer8)) | Group(grammer9 + ZeroOrMore(grammer10)) LOG.info('Grammar Generated') child_ref = { 'source-address-list': 'ip address', 'domain-list': 'domain', 'answer vip': 'keepalive', 'answer-group': 'answer-add', 'dns rule': 'clause' } excel_dict = dict() out_dict = { 'source-address-list': [], 'domain-list': [], 'answer vip': [], 'answer-group': [], 'dns rule': [] } ref = '' print("Parsing the File ...") total_parse_count = 0 c = 0 for match, start, end in testing.scanString(input_data): # incrementing total object count for reporting total_parse_count += 1 matched = match.asList() type = matched[0][0][0] name = matched[0][0][1] excel_dict[type + '-' + name] = { 'type': type, 'name': name, 'status': '', 'na': '', 'skipped': '' } msg = 'Parsing Entity [ ' + matched[0][0][0] + '->' +\ matched[0][0][1] + ']' printProgressBar(end, file_size, msg, prefix='Progress', suffix='') # dictonary creation start = True for line in matched[0]: out = 'Parsing [ ' + line[0] + '->' + line[1] + ']' LOG.debug(out) # print matched[0] if start: ref = line[0] parent = parse(line) start = False else: child = parse(line) if child_ref[ref] not in parent: parent[str(child_ref[ref])] = [child] else: parent[str(child_ref[ref])].append(child) out_dict[ref].append(parent) LOG.info('Config File Parsed') set_excel_dict(excel_dict) return out_dict, excel_dict
def rc_statement(): """ Generate a RC statement parser that can be used to parse a RC file :rtype: pyparsing.ParserElement """ one_line_comment = "//" + restOfLine comments = cStyleComment ^ one_line_comment precompiler = Word("#", alphanums) + restOfLine language_definition = ( "LANGUAGE" + Word(alphas + "_").setResultsName("language") + Optional("," + Word(alphas + "_").setResultsName("sublanguage"))) block_start = (Keyword("{") | Keyword("BEGIN")).setName("block_start") block_end = (Keyword("}") | Keyword("END")).setName("block_end") reserved_words = block_start | block_end name_id = ~reserved_words + Word(alphas, alphanums + "_").setName("name_id") numbers = Word(nums) integerconstant = numbers ^ Combine("0x" + numbers) constant = Combine( Optional(Keyword("NOT")) + (name_id | integerconstant), adjacent=False, joinString=" ", ) combined_constants = delimitedList(constant, "|") concatenated_string = OneOrMore(quotedString) block_options = Optional( SkipTo(Keyword("CAPTION"), failOn=block_start)("pre_caption") + Keyword("CAPTION") + quotedString("caption")) + SkipTo(block_start)("post_caption") undefined_control = Group( name_id.setResultsName("id_control") + delimitedList(concatenated_string ^ constant ^ numbers ^ Group(combined_constants)).setResultsName("values_")) block = block_start + ZeroOrMore(undefined_control)("controls") + block_end dialog = (name_id("block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG"))("block_type") + block_options + block) string_table = Keyword("STRINGTABLE")("block_type") + block_options + block menu_item = Keyword("MENUITEM")("block_type") + ( commaSeparatedList("values_") | Keyword("SEPARATOR")) popup_block = Forward() popup_block <<= Group( Keyword("POPUP")("block_type") + Optional(quotedString("caption")) + block_start + ZeroOrMore(Group(menu_item | popup_block))("elements") + block_end)("popups*") menu = (name_id("block_id") + Keyword("MENU")("block_type") + block_options + block_start + ZeroOrMore(popup_block) + block_end) return comments ^ precompiler ^ language_definition ^ dialog ^ string_table ^ menu
indirection_content = indirect_expr("expr") | basic_operand("basic") indirection = P.Group(sandwich("[]", indirection_content) | sandwich("()", indirection_content)) operand = basic_operand("basic") | indirection("indirect") def make_words(data): return [a << 8 | b for a, b in izip_longest(data[::2], data[1::2], fillvalue=0)] def wordize_string(s, l, tokens): bytes = [ord(c) for c in tokens.string] # TODO(pwaller): possibly add syntax for packing string data? packed = False return make_words(bytes) if packed else bytes quoted_string = P.quotedString("string").addParseAction(P.removeQuotes).addParseAction(wordize_string) datum = quoted_string | numeric_literal def parse_data(string, loc, tokens): result = [] for token in tokens: values = datum.parseString(token).asList() assert all(v < WORD_MAX for v in values), "Datum exceeds word size" result.extend(values) return result # TODO(pwaller): Support for using macro argument values in data statement datalist = P.commaSeparatedList.copy().setParseAction(parse_data) data = P.CaselessKeyword("DAT")("opcode") + P.Group(datalist)("data") line = P.Forward()
__version__, Suppress, Empty) grammar = Forward() expression = Forward() # Literals intNumber = Regex(r'-?\d+')('integer') floatNumber = Regex(r'-?\d+\.\d+')('float') sciNumber = Combine((floatNumber | intNumber) + CaselessLiteral('e') + intNumber)('scientific') aString = quotedString('string') # Use lookahead to match only numbers in a list (can't remember why this is necessary) afterNumber = FollowedBy(",") ^ FollowedBy(")") ^ FollowedBy(LineEnd()) number = Group((sciNumber + afterNumber) | (floatNumber + afterNumber) | (intNumber + afterNumber))('number') boolean = Group(CaselessKeyword("true") | CaselessKeyword("false"))('boolean') none = Group(CaselessKeyword('none'))('none') argname = Word(alphas + '_', alphanums + '_')('argname') funcname = Word(alphas + '_', alphanums + '_')('funcname') ## Symbols leftParen = Literal('(').suppress()
def build_sequence( expression: str, evaluation_function: Callable[[str], bool] = eval) -> Sequence: """ Parse an expression and return the corresponding sequence according to the following mini-language: - atom: - "code" or 'code': a fragment of code (e.g. Python code) representing a Boolean expression that evaluates to true or false. The semantics is "satisfied once": as soon as the code evaluates to true once, the truth value of the expression remains true. This is equivalent as "sometimes 'code'" in linear temporal logic. - constants: - failure: this constant always evaluates to false. - success: this constant always evaluates to true. - unary operators: - never A: this expression evaluates to false as soon as expression A evaluates to true. - binary operators: - A and B: logical and - A or B: logical or - A -> B: this is equivalent to "(next always B) since A" in linear temporal logic, i.e. B has to be true (strictly) since A holds. Notice that, due to the "satisfied once" semantics of the atoms, if A and B are atoms, this is merely equivalent to "(A and next (sometimes B))", which means A needs to be true strictly before B or, in other words, A must be satisfied once, then B must be holds once. Keywords are case-insensitive. Parentheses can be used to group sub expressions. Unary operators have precedence over binary ones (e.g. "A and never B" is equivalent to "A and (never B)"). Unary operators are right associative while binary operators are left associative (e.g. "A and B and C" is equivalent to "(A and B) and C"). The binary operators are listed in decreasing priority (e.g. "A or B and C" is equivalent to "A or (B and C)", and "A and B -> C or D" is equivalent to "(A and B) -> (C or D)"). Examples (assuming that expressions between quotes can be evaluated to true or false): - "put water" -> "put coffee": ensures water is put before coffee. - "put water" and "put coffee": ensures water and coffee are put. Due to the "satisfied once" semantics of the atoms, the order in which items are put does not matter. - (never "put water") or ("put water" -> "put coffee"): if water is put, then coffee must be put too. - never ("put water" -> "put water"): the condition will fail if water is put twice (but will succeed if water is put once or never put). - "put water" -> (never "put water"): put water exactly once. :param expression: an expression to parse :param evaluation_function: the function that will be called to evaluate nested pieces of code :return: a *Sequence* instance. """ def unary_operator(func, term): term = term[0] return func(term[0]) def binary_operator(func, term): term = term[0] return reduce(func, term[1:], term[0]) condition = pyparsing.quotedString().setParseAction( lambda s, l, t: SequenceSometimes( SequenceCondition(t[0][1:-1], evaluation_function))) constants = ( pyparsing.CaselessKeyword('failure').setParseAction(SequenceFailure) | pyparsing.CaselessKeyword('success').setParseAction(SequenceSuccess)) unary_ops = [(pyparsing.CaselessKeyword(keyword).suppress(), 1, pyparsing.opAssoc.RIGHT, partial(unary_operator, func)) for keyword, func in [ ('never', SequenceNever), ]] binary_ops = [(pyparsing.CaselessKeyword(keyword).suppress(), 2, pyparsing.opAssoc.LEFT, partial(binary_operator, func)) for keyword, func in [ ('and', SequenceAnd), ('or', SequenceOr), ('->', SequenceThen), ]] operands = (constants | condition) expr = pyparsing.operatorPrecedence(operands, unary_ops + binary_ops) try: return expr.parseString(expression, parseAll=True)[0] except pyparsing.ParseBaseException as e: raise StatechartError('Invalid sequential condition:\n%s' % expression) from e
import pyparsing as pp from tfci.dsl.ast import Location, ConstantType, Constant, Label, Opcode, Identifier, Empty, Map, Command, Comment, \ Lines pp.ParserElement.setDefaultWhitespaceChars(" \t") eol = pp.LineEnd() eol.setName('EOL') string_constant = pp.quotedString() string_constant.addParseAction(pp.removeQuotes) string_constant.addParseAction( lambda s, loc, toks: Constant(ConstantType.String, toks[0], Location(loc))) identifier_chars = pp.alphanums + '_.-' key_path_chars = identifier_chars + ':/' identifier = pp.ZeroOrMore('^') + pp.Word(identifier_chars) def map_identifier(s, loc, toks): return Identifier(len(toks) - 1, toks[-1], Location(loc)) identifier.addParseAction(map_identifier) address_constant = '@' + pp.Word(key_path_chars) address_constant.addParseAction(lambda s, loc, toks: Constant( ConstantType.Address, toks[1], Location(loc)))
# need to add support for alg expressions columnRval = realNum | intNum | quotedString.addParseAction(removeQuotes) | columnName whereCondition = Group( ( columnName + binop + (columnRval | Word(printables) ) ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + statement + ")" ) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) ''' Assignment for handoff. ''' setExpression = Forward () setStatement = Group( ( ident ) | ( quotedString("json_path") + AS + ident("name") ) | ( "(" + setExpression + ")" ) ) setExpression << setStatement + ZeroOrMore( ( and_ | or_ ) + setExpression ) optWhite = ZeroOrMore(LineEnd() | White()) """ Define the statement grammar. """ statement <<= ( Group( Group(SELECT + question_graph_expression)("concepts") + optWhite + Group(FROM + tableNameList) + optWhite + Group(Optional(WHERE + whereExpression("where"), "")) + optWhite + Group(Optional(SET + setExpression("set"), ""))("select") ) |
from pyparsing import quotedString from datetime import datetime nw = datetime.utcnow() nowstring = '"%s"' % (nw.strftime("%d %b %Y %X")[:-3] + " UTC") print (nowstring) quoted_time = quotedString() quoted_time.setParseAction(lambda: nowstring) version_time = "__versionTime__ = " + quoted_time with open('pyparsing.py') as oldpp: new_code = version_time.transformString(oldpp.read()) with open('pyparsing.py','w') as newpp: newpp.write(new_code)
pass def checkQuotedColon(s, loc, toks): if ':' in toks[0]: raise InvalidSQL("identifier with colon : must be in double quotes.") def checkDoubleQuotes(s, loc, toks): # TODO really? if toks[0][0] == "'": raise InvalidSQL("quoted strings must use double quotes.") ident = Word(alphas, alphanums + "_:").setParseAction(checkQuotedColon) columnName = (ident | quotedString().setParseAction(checkDoubleQuotes))("columnName") whereExpression = Forward() and_ = Keyword("and", caseless=True)('and') or_ = Keyword("or", caseless=True)('or') in_ = Keyword("in", caseless=True)("in") isnotnull = Keyword("is not null", caseless=True)('notnull') binop = oneOf("= != < > >= <=", caseless=True)('binop') intNum = Word(nums) columnRval = (intNum | quotedString)('rval*') whereCondition = Group( (columnName + isnotnull) | (columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | ("(" + whereExpression + ")")
def make_words(data): return [ a << 8 | b for a, b in izip_longest(data[::2], data[1::2], fillvalue=0) ] def wordize_string(s, l, tokens): bytes = [ord(c) for c in tokens.string] # TODO(pwaller): possibly add syntax for packing string data? packed = False return make_words(bytes) if packed else bytes quoted_string = P.quotedString("string").addParseAction( P.removeQuotes).addParseAction(wordize_string) datum = quoted_string | numeric_literal def parse_data(string, loc, tokens): result = [] for token in tokens: values = datum.parseString(token).asList() assert all(v < WORD_MAX for v in values), "Datum exceeds word size" result.extend(values) return result # TODO(pwaller): Support for using macro argument values in data statement datalist = P.commaSeparatedList.copy().setParseAction(parse_data) data = P.CaselessKeyword("DAT")("opcode") + P.Group(datalist)("data")
# messageBody ::= { fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension }* messageBody << Group( ZeroOrMore( Group(fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension))) # methodDefn ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';' methodDefn = (RPC_ - ident("methodName") + LPAR + Optional(ident("methodParam")) + RPAR + RETURNS_ + LPAR + Optional(ident("methodReturn")) + RPAR) # serviceDefn ::= 'service' ident '{' methodDefn* '}' serviceDefn = (SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(methodDefn)) + RBRACE) syntaxDefn = SYNTAX_ + EQ - quotedString("syntaxString") + SEMI # packageDirective ::= 'package' ident [ '.' ident]* ';' packageDirective = Group(PACKAGE_ - delimitedList(ident, ".", combine=True) + SEMI) comment = "//" + restOfLine importDirective = IMPORT_ - quotedString("importFileSpec") + SEMI optionDirective = (OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI) topLevelStatement = Group(messageDefn | messageExtension | enumDefn
messageBody << Group(ZeroOrMore( Group(fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension) )) # methodDefn ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';' methodDefn = (RPC_ - ident("methodName") + LPAR + Optional(ident("methodParam")) + RPAR + RETURNS_ + LPAR + Optional(ident("methodReturn")) + RPAR + Optional(SEMI)) # serviceDefn ::= 'service' ident '{' methodDefn* '}' serviceDefn = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(methodDefn)) + RBRACE # packageDirective ::= 'package' ident [ '.' ident]* ';' packageDirective = Group(PACKAGE_ - delimitedList(ident, '.', combine=True) + SEMI) comment = '//' + restOfLine importDirective = IMPORT_ - quotedString("importFileSpec") + SEMI optionDirective = OPTION_ - ident("optionName") + EQ + ident("optionValue") + SEMI topLevelStatement = Group(messageDefn | messageExtension | enumDefn | serviceDefn | importDirective | optionDirective) parser = Optional(importDirective) + Optional(packageDirective) + ZeroOrMore(topLevelStatement) parser.ignore(comment) proto_txt = file(sys.argv[1]).read() from pprint import pprint tree = parser.parseString(proto_txt, parseAll=True).asList() type_mapping = {
ZeroOrMore(Group(fieldDefn)) + RPAR + ZeroOrMore(THROWS_ + Group(exceptionsDefn))) methodDefn2 = (ident("ident") + VOID_ - ident("methodName") + LPAR + ZeroOrMore(Group(fieldDefn)) + RPAR + ZeroOrMore(THROWS_ + Group(exceptionsDefn))) # serviceDefn ::= 'service' ident '{' methodDefn* '}' serviceDefn = (SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(methodDefn)) + RBRACE) typeDefn = TYPEDEF_ - typespec("typespec") + ident("ident") comment = "//" + restOfLine | cStyleComment comment1 = "#" + restOfLine importDirective = IMPORT_ - (quotedString("importFileSpec")) + SEMI optionDirective = (OPTION_ - ident("optionName") + EQ + (quotedString("optionValue") | TRUE_ | FALSE_ | ident) + SEMI) topLevelStatement = Group(messageDefn | unionDefn | messageExtension | enumDefn | serviceDefn | namespaceDefn | typeDefn | exceptionDefn | versionDefn) thrift_parser = Optional(packageDirective) + ZeroOrMore(topLevelStatement) thrift_parser.ignore(comment) thrift_parser.ignore(comment1) thrift_parser.ignore("option " + restOfLine) thrift_parser.ignore("import " + restOfLine)
def _setup_QASMParser(): """ Routine to initialise and return parsing blocks """ class _Op: """ Class to set up quantum operations """ def __init__(self, name, argParser, version="OPENQASM 2.0", qop=False, keyOverride=None): global cops global qops global _reservedKeys if name in qops or name in cops: raise IOError(dupTokenWarning.format("Operation", name)) self.operation = name if keyOverride is not None: self.parser = (keyOverride + argParser).addParseAction( lambda s, l, t: _override_keyword(t, name)) else: self.parser = CaselessKeyword(name)("keyword") + argParser self.version = parse_version(version) self.parser.addParseAction( lambda s, l, t: _set_version(t, self.version)) _reservedKeys.append(name) if qop: qops[name] = self else: cops[name] = self class _Routine(): """ Class to set up quantum gates, circuits, etc. """ def __init__(self, name, pargs=False, spargs=False, gargs=False, qargs=False, returnables=False, prefixes=None, version="OPENQASM 2.0"): global blocks global _reservedKeys if name in qops or name in cops: raise IOError(dupTokenWarning.format("Routine", name)) self.operation = name self.parser = Keyword(name)("keyword") + validName("gateName") if prefixes: localPrefixParser = Each(map(Optional, map( Keyword, prefixes))).addParseAction(prefix_setter) else: localPrefixParser = prefixParser self.parser = localPrefixParser + self.parser # Handle different args req = [] if pargs: req.append(Optional(pargParser)("pargs")) if spargs: req.append(Optional(spargParser)("spargs")) if gargs: req.append(Optional(gargParser)("gargs")) self.parser = self.parser + Each(req) if qargs: self.parser = self.parser + qargParser("qargs") if returnables: self.parser = self.parser + Optional(returnParser) self.version = parse_version(version) self.parser.addParseAction( lambda s, l, t: _set_version(t, self.version)) _reservedKeys.append(name) blocks[name] = self class _Block(): """ Class to set up blocks such as if, for, etc. """ def __init__(self, name, detParser, version="OPENQASM 2.0"): global blocks global _reservedKeys self.operation = name self.parser = Keyword(name)("keyword") + detParser self.version = parse_version(version) self.parser.addParseAction( lambda s, l, t: _set_version(t, self.version)) _reservedKeys.append(name) blocks[name] = self sign = Word("+-", exact=1) number = Word(nums) expo = Combine(CaselessLiteral("e") + Optional(sign) + number).setResultsName("exponent") pi = CaselessKeyword("pi") bitstring = Combine(OneOrMore(oneOf("0 1")) + Literal("b")) integer = Combine(number + Optional(expo)) real = Combine( Optional(sign) + (("." + number) ^ (number + "." + Optional(number))) + Optional(expo)) validName = Forward() lineEnd = Literal(";") _is_ = Keyword("to").suppress() _in_ = Keyword("in") _to_ = Literal("->").suppress() commentSyntax = "//" commentOpenStr = "/*" commentCloseStr = "*/" commentOpenSyntax = Literal(commentOpenStr) commentCloseSyntax = Literal(commentCloseStr) dirSyntax = "***" dirOpenStr = f"{dirSyntax} begin" dirCloseStr = f"{dirSyntax} end" dirSyntax = Keyword(dirSyntax) dirOpenSyntax = CaselessLiteral(dirOpenStr) dirCloseSyntax = CaselessLiteral(dirCloseStr) intFunc = oneOf("abs powrem countof fllog") realFunc = oneOf("abs powrem arcsin arccos arctan sin cos tan exp ln sqrt") boolFunc = oneOf("andof orof xorof") inL, inS, inR = map(Suppress, "[:]") vBar = Suppress("|") bSlash = Suppress("\\") brL, brR = map(Suppress, "()") intExp = Forward() realExp = Forward() boolExp = Forward() index = intExp.setResultsName("index") interval = Optional(intExp.setResultsName("start"), default=None) + inS \ + Optional(intExp.setResultsName("end"), default=None) \ + Optional(inS + Optional(intExp.setResultsName("step"), default=1)) interRef = Group(inL + interval + inR) loopRef = Group( inL + intExp.setResultsName("start") + inS + intExp.setResultsName("end") + Optional(inS + Optional(intExp.setResultsName("step"), default=1)) + inR) ref = inL + Group(delimitedList(index ^ interval))("ref") + inR regNoRef = validName("var") regRef = Group(validName("var") + Optional(ref)) regMustRef = Group(validName("var") + ref) regListNoRef = Group(delimitedList(regNoRef)) regListRef = Group(delimitedList(regRef)) inPlaceAlias = vBar + regListRef + vBar validQarg = regRef | inPlaceAlias aliasQarg = Group(regRef) | inPlaceAlias inPlaceCreg = bSlash + delimitedList(regRef | bitstring) + bSlash validCreg = (regRef | inPlaceCreg) def set_maths_type(toks, mathsType): """ Set logical or integer or floating point """ toks["type"] = mathsType intVar = integer | regRef realVar = real | integer | pi | regRef boolVar = interRef | regRef | realExp | intExp | validCreg | bitstring intFuncVar = (intFunc + brL + Group(Optional(delimitedList(intVar)))("args") + brR).setParseAction(Function) realFuncVar = ((realFunc ^ intFunc) + brL + Group(Optional(delimitedList(realVar)))("args") + brR).setParseAction(Function) boolFuncVar = (boolFunc + brL + Group(Optional(delimitedList(boolVar)))("args") + brR).setParseAction(Function) mathOp = [(oneOf("- +"), 1, opAssoc.RIGHT, Binary), (oneOf("^"), 2, opAssoc.LEFT, Binary), (oneOf("* / div"), 2, opAssoc.LEFT, Binary), (oneOf("+ -"), 2, opAssoc.LEFT, Binary)] logOp = [(oneOf("! not"), 1, opAssoc.RIGHT, Binary), (oneOf("and or xor"), 2, opAssoc.LEFT, Binary), (oneOf("< <= == != >= >"), 2, opAssoc.LEFT, Binary), (oneOf("in"), 2, opAssoc.LEFT, Binary)] intExp <<= infixNotation( intFuncVar | intVar, mathOp).setParseAction(lambda s, l, t: set_maths_type(t, "int")) realExp <<= infixNotation( realFuncVar | realVar, mathOp).setParseAction(lambda s, l, t: set_maths_type(t, "float")) boolExp <<= infixNotation( boolFuncVar | boolVar, logOp).setParseAction(lambda s, l, t: set_maths_type(t, "bool")) mathExp = intExp ^ realExp ^ boolExp cregExp = bitstring("bit") ^ validCreg("reg") prefixes = ["unitary"] callMods = ["CTRL", "INV"] def prefix_setter(toks): """ Pull out prefixes of gate calls and add them into list """ for prefix in prefixes: toks[prefix] = prefix in toks.asList() prefixParser = Each(map(Optional, map(Keyword, prefixes))).addParseAction(prefix_setter) pargParser = brL + delimitedList(validName)("pargs") + brR spargParser = inL + delimitedList(validName)("spargs") + inR gargParser = ungroup( nestedExpr("<", ">", delimitedList(ungroup(validName)), None)) qargParser = delimitedList(regRef) callQargParser = delimitedList(validQarg) callPargParser = brL + delimitedList(realExp) + brR callSpargParser = inL + delimitedList(intExp) + inR fullArgParser = Each( (Optional(pargParser("pargs")), Optional(spargParser("spargs")), Optional(gargParser("gargs")))) callArgParser = Each( (Optional(callPargParser("pargs")), Optional(callSpargParser("spargs")), Optional(gargParser("gargs")))) returnParser = Optional(_to_ + validCreg("byprod")) modifiers = ZeroOrMore(Combine(oneOf(callMods) + Suppress("-"))) commentLine = Literal(commentSyntax).suppress() + restOfLine("comment") commentBlock = cStyleComment("comment").addParseAction( removeQuotes).addParseAction(removeQuotes) comment = commentLine | commentBlock comment.addParseAction(lambda s, l, t: _set_version(t, (0, 0, 0))) directiveName = Word(alphas).setParseAction(downcaseTokens) directiveArgs = CharsNotIn(";") _Op("directive", directiveName("directive") + Suppress(White() * (1, )) + directiveArgs("args"), version="REQASM 1.0", keyOverride=(~dirOpenSyntax + ~dirCloseSyntax + dirSyntax)) def split_args(toks): """ Split directive arguments out """ toks[0]["keyword"] = "directive" toks[0]["args"] = toks[0]["args"].strip().split(" ") directiveStatement = directiveName("directive") + restOfLine("args") + \ Group(ZeroOrMore(Combine(Optional(White(" ")) + ~dirCloseSyntax + Word(printables+" "))))("block") directiveBlock = ungroup( nestedExpr( dirOpenSyntax, dirCloseSyntax, content=directiveStatement, ignoreExpr=(comment | quotedString )).setWhitespaceChars("\n").setParseAction(split_args)) directiveBlock.addParseAction(lambda s, l, t: _set_version(t, (2, 1, 0))) # Programming lines _Op("version", Empty(), version=(0, 0, 0), keyOverride=Combine( oneOf(versions)("type") + White() + real("versionNumber"))("version")) _Op("include", quotedString("file").addParseAction(removeQuotes)) # Gate-like structures _Op("opaque", validName("name") + fullArgParser + Optional(qargParser("qargs")) + returnParser, keyOverride=prefixParser + "opaque") _Routine("gate", pargs=True, qargs=True) _Routine("circuit", pargs=True, qargs=True, spargs=True, returnables=True, version="REQASM 1.0") # Variable-like structures _Op("creg", regRef("arg")) _Op("qreg", regRef("arg")) _Op("cbit", Group(regNoRef)("arg"), version="REQASM 1.0") _Op("qbit", Group(regNoRef)("arg"), version="REQASM 1.0") _Op("defAlias", regMustRef("alias"), keyOverride="alias", version="REQASM 1.0") # No more on-definition aliases _Op("alias", regRef("alias") + _is_ + aliasQarg("target"), keyOverride="set", version="REQASM 1.0") _Op("val", validName("var") + Literal("=").suppress() + mathExp("val"), version="REQASM 1.0") _Op("set", (Group(regRef)("var") ^ inPlaceCreg("var")) + Literal("=").suppress() + cregExp("val"), version="REQASM 1.0") # Operations-like structures _Op("measure", regRef("qreg") + _to_ + regRef("creg"), qop=True) _Op("barrier", regListNoRef("args")) _Op("output", regRef("value"), qop=True, version="REQASM 1.0") _Op("reset", regRef("qreg")) _Op("exit", Empty(), version="REQASM 1.0") _Op("free", validName("target"), version="REQASM 1.0") _Op("next", validName("loopVar"), qop=True, version="REQASM 1.0") _Op("finish", (Literal("quantum process") | validName)("loopVar"), qop=True, version="REQASM 1.0") _Op("end", validName("process"), qop=True, version="REQASM 1.0") # Special gate call handler callGate = Combine(Group(modifiers)("mods") + \ validName("gate")) + \ callArgParser + \ callQargParser("qargs").addParseAction(lambda s, l, t: _override_keyword(t, "call")) + \ returnParser callGate.addParseAction(lambda s, l, t: _set_version(t, (1, 2, 0))) # Block structures _Block("for", validName("var") + _in_ + loopRef("range"), version="REQASM 1.0") _Block("if", "(" + boolExp("cond") + ")", version="REQASM 1.0") _Block("while", "(" + boolExp("cond") + ")", version="OMEQASM 1.0") qopsParsers = list(map(lambda qop: qop.parser, qops.values())) + [callGate, directiveBlock] blocksParsers = list(map(lambda block: block.parser, blocks.values())) _Op("if", blocks["if"].parser + Group(Group(Group(Or(qopsParsers))))("block"), version="OPENQASM 2.0", keyOverride=Empty()) _Op("for", blocks["for"].parser + Group(Group(Group(Or(qopsParsers))))("block"), version="REQASM 1.0", keyOverride=Empty()) _Op("while", blocks["while"].parser + Group(Group(Group(Or(qopsParsers))))("block"), version="OMEQASM 1.0", keyOverride=Empty()) # Set-up line parsers reservedNames = Or(map(Keyword, _reservedKeys)) validName <<= (~reservedNames) + Word(alphas, alphanums + "_") copsParsers = list(map(lambda cop: cop.parser, cops.values())) operations = (( (Or(copsParsers) ^ Or(qopsParsers)) | # Classical/Quantum Operations callGate | # Gate parsers White() # Blank Line ) + lineEnd.suppress()) ^ directiveBlock # ; or Directives validLine = Forward() codeBlock = nestedExpr("{", "}", Suppress(White()) ^ Group(validLine), (quotedString)) validLine <<= ( ((operations + Optional(comment)) ^ (Or(blocksParsers) + codeBlock("block") + Optional(lineEnd)) ^ comment)) # Whole line comment testLine = Forward() dummyCodeBlock = nestedExpr( "{", "}", testLine, (directiveBlock | quotedString | comment)) + Optional(lineEnd) ignoreSpecialBlocks = (~commentOpenSyntax + ~commentCloseSyntax + ~dirOpenSyntax + ~dirCloseSyntax) testLine <<= ( comment | # Comments directiveBlock | # Directives (ignoreSpecialBlocks + ZeroOrMore(CharsNotIn("{}")) + dummyCodeBlock) | # Block operations (ignoreSpecialBlocks + ZeroOrMore(CharsNotIn("{};")) + lineEnd) ) # QASM Instructions testKeyword = (dirSyntax.setParseAction( lambda s, l, t: _override_keyword(t, "directive")) | Word(alphas)("keyword")) code = (Group(directiveBlock)) | Group(validLine) return code, testLine, testKeyword, reservedNames, mathExp
def banana_grammar(emitter=emit.PrintEmitter()): """ Generate a banana parser that can be then used to parse a banana content. It build an AST on which operation can then be applied. :return: Return a banana parser :rtype: BananaScopeParser """ # Should debug debug_grammar = False # Actions def action_str_lit(s, l, t): return ast.StringLit(ast.make_span(s, l, t), t[0]) def action_num_lit(s, l, t): return ast.Number(ast.make_span(s, l, t), t[0]) def action_ident(s, l, t): return ast.Ident(ast.make_span(s, l, t), t[0]) def action_expr(s, l, t): if len(t) != 1: raise exception.BananaGrammarBug( 'Bug found in the grammar for expression,' ' Please report this bug.' ) if isinstance(t[0], ast.Expr): return t[0] return ast.Expr(ast.make_span(s, l, t), t[0]) def action_dot_path(s, l, t): # First token is the name of the variable # The rest is the property path if isinstance(t[0], ast.StringLit) and len(t[1:]) == 0: return t[0] return ast.DotPath(ast.make_span(s, l, t), t[0], t[1:]) def action_json_obj(s, l, t): return ast.JsonObj(ast.make_span(s, l, t), t) def action_parse_ctor_arg(s, l, t): if len(t) > 1: return ast.ComponentCtorArg(ast.make_span(s, l, t), t[1], t[0]) else: return ast.ComponentCtorArg(ast.make_span(s, l, t), t[0]) def action_parse_comp_ctor(s, l, tokens): comp = ast.Component(ast.make_span(s, l, tokens)) for tok in tokens: if isinstance(tok, ast.Ident): comp.set_ctor(tok) elif isinstance(tok, ast.ComponentCtorArg): comp.add_arg(tok) else: raise exception.BananaGrammarBug( 'Bug found in the grammar, Please report this bug' ) return comp def action_assignment(s, l, t): return ast.Assignment(ast.make_span(s, l, t), t[0], t[1]) def action_create_connections(s, l, t): ast_conn = ast.into_connection(t[0]) ast_conn.span = ast.make_span(s, l, t) for i in range(1, len(t)): next_conn = ast.into_connection(t[i]) ast_conn.connect_to(next_conn, emitter) return ast_conn def action_merge_connections(s, l, t): ast_conn = ast.Connection(ast.make_span(s, l, t)) ast_conn.merge_all(t, emitter) return ast_conn def action_root_ast(s, l, tokens): root = ast.BananaFile(emitter) for tok in tokens: if isinstance(tok, ast.Assignment): if isinstance(tok.rhs, ast.Component): root.add_component_ctor(tok.lhs, tok.rhs) else: root.add_assignment(tok.lhs, tok.rhs) elif isinstance(tok, ast.Connection): root.add_connections(tok) else: raise exception.BananaGrammarBug( 'Bug found in the grammar, Please report this bug.' ) return root # TODO(Joan): Remove once it is no longer needed def print_stmt(s, l, t): print("\nPRINT AST") print((l, [str(x) for x in t])) print("END PRINT AST\n") def action_unimplemented(s, l, t): raise exception.BananaGrammarBug("unimplemented code reached") # Tokens equals = p.Literal("=").suppress().setName('"="').setDebug(debug_grammar) arrow = p.Literal("->").suppress().setName('"->"').setDebug(debug_grammar) lbra = p.Literal("[").suppress().setName('"["').setDebug(debug_grammar) rbra = p.Literal("]").suppress().setName('"]"').setDebug(debug_grammar) colon = p.Literal(":").suppress().setName('":"') comma = p.Literal(",").suppress().setName(",") less = p.Literal("<").suppress().setName('"<"') greater = p.Literal(">").suppress().setName('">"') lbrace = p.Literal("{").suppress().setName('"{"').setDebug(debug_grammar) rbrace = p.Literal("}").suppress().setName('"}"').setDebug(debug_grammar) lpar = p.Literal("(").suppress().setName('"("') rpar = p.Literal(")").suppress().setName('")"') # Keywords ing = p.Literal("ing").suppress() imp = p.Literal("import").suppress() fro = p.Literal("from").suppress() # String Literal, Numbers, Identifiers string_lit = p.quotedString()\ .setParseAction(action_str_lit)\ .setName(const.STRING_LIT) number_lit = p.Regex(r'\d+(\.\d*)?([eE]\d+)?')\ .setParseAction(action_num_lit)\ .setName(const.NUMBER) ident = p.Word(p.alphas + "_", p.alphanums + "_")\ .setParseAction(action_ident)\ .setName(const.IDENT) # Path for properties dot_prop = ident | string_lit dot_path = p.delimitedList(dot_prop, ".")\ .setParseAction(action_dot_path)\ .setName(const.DOT_PATH)\ .setDebug(debug_grammar) # Expressions # Here to simplify the logic, we can match directly # against ident and string_lit to avoid having to deal # only with dot_path. It also allow to remove the confusion # where '"a"' could be interpreted as a dot_path and would thus # be the same as 'a'. With the following, the first we # always be type-checked as a String whereas the latter will # be as the type of the variable. expr = p.infixNotation(number_lit | dot_path, [ (p.oneOf('* /'), 2, p.opAssoc.LEFT), (p.oneOf('+ -'), 2, p.opAssoc.LEFT), ], lpar=lpar, rpar=rpar) expr.setParseAction(action_expr)\ .setName(const.EXPR)\ .setDebug(debug_grammar) # Json-like object (value are much more) json_obj = p.Forward() json_value = p.Forward() json_array = p.Group( lbra + p.Optional(p.delimitedList(json_value)) + rbra ) json_array.setDebug(debug_grammar) json_array.setName(const.JSON_ARRAY) json_value <<= expr | json_obj | json_array json_value.setDebug(debug_grammar)\ .setName(const.JSON_VALUE) json_members = p.delimitedList(p.Group(dot_path + colon - json_value)) +\ p.Optional(comma) json_members.setDebug(debug_grammar)\ .setName(const.JSON_MEMBERS) json_obj <<= p.Dict(lbrace + p.Optional(json_members) - rbrace) json_obj.setParseAction(action_json_obj)\ .setName(const.JSON_OBJ)\ .setDebug(debug_grammar) # Component constructor arg = (ident + equals - (expr | json_obj)) | expr | json_obj arg.setParseAction(action_parse_ctor_arg) params = p.delimitedList(arg) comp_ctor = ident + lpar - p.Optional(params) + rpar comp_ctor.setParseAction(action_parse_comp_ctor)\ .setName(const.COMP_CTOR)\ .setDebug(debug_grammar) # Assignments assignment = dot_path + equals - (comp_ctor | expr | json_obj) assignment.setParseAction(action_assignment) # Connections connection = p.Forward() array_of_connection = p.Group( lbra + p.Optional(p.delimitedList(connection)) + rbra ) array_of_connection.setParseAction(action_merge_connections) last_expr = ident | array_of_connection this_expr = p.Forward() match_expr = p.FollowedBy(last_expr + arrow - last_expr) + \ (last_expr + p.OneOrMore(arrow - last_expr)) this_expr <<= match_expr | last_expr connection <<= this_expr match_expr.setDebug(debug_grammar)\ .setName(const.CONNECTION) \ .setParseAction(action_create_connections) # Definitions definition = ing - less - string_lit - greater - ident - lbrace - rbrace definition.setDebug(debug_grammar)\ .setName(const.DEFINITION)\ .setParseAction(action_unimplemented) # Import directive module_def = (imp - ident) | fro - ident - imp - ident module_def.setDebug(debug_grammar)\ .setName(const.MOD_IMPORT)\ .setParseAction(action_unimplemented) # Comments comments = "#" + p.restOfLine statement = assignment | \ match_expr | \ definition | \ module_def statement.setName(const.STATEMENT) statement.setDebug(debug_grammar) statement.setParseAction(print_stmt) # Grammar grammar = p.OneOrMore(statement).ignore(comments) grammar.setParseAction(action_root_ast) return BananaScopeParser(grammar)
from pyparsing import Word, delimitedList, Optional, \ Group, alphas, nums, alphanums, ParseException, Forward, oneOf, quotedString, \ ZeroOrMore, Keyword class InvalidSQL(Exception): pass ident = Word(alphas, alphanums + "_:") columnName = (ident | quotedString())("columnName") whereExpression = Forward() and_ = Keyword("and", caseless=True)('and') or_ = Keyword("or", caseless=True)('or') in_ = Keyword("in", caseless=True)("in") isnotnull = Keyword("is not null", caseless=True)('notnull') binop = oneOf("= != < > >= <=", caseless=True)('binop') intNum = Word(nums) columnRval = (intNum | quotedString.setParseAction(lambda x: x[0][1:-1]))('rval*') whereCondition = Group((columnName + isnotnull) | (columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | ("(" + whereExpression + ")"))('condition') whereExpression << Group(whereCondition + ZeroOrMore( (and_ | or_) + whereExpression))('expression') class SQLValidator(object):
from pprint import pprint #define the grammar # The objects OBJECT = Word(alphas, alphanums+'_') OBJECTS = delimitedList(OBJECT('object')) OBJLIST = '{' + OBJECTS('objects') + '}' #The arguments KEY = Word(alphas) POINT = Literal('.') PLUSORMINUS = Literal('+') | Literal('-') NUMBER = Word(nums) INTEGER = Regex(r'[-+]?[0-9]*') FLOAT = Regex(r'[-+]?[0-9]*\.[0-9]+') STRING = quotedString() VALUE=STRING('string') | FLOAT('float') | INTEGER('integer') KVPAIR = Group(KEY('key')+'='+VALUE('value')) KVPAIRS = delimitedList(KVPAIR('kvpair')) KVLIST = '(' + KVPAIRS('kvpairs') + ')' # The command COMNAME = Word(alphas) COMMAND = OBJLIST('objlist') + COMNAME('comname') + Optional(KVLIST('kvlist')) COMSPEC = COMMAND('command') class ServerCommand: def __init__(self, commandLine): self.commandLine = commandLine try: fn = COMSPEC.parseString(commandLine)
# methodDefn ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';' methodDefn = (RPC_ - ident("methodName") + LPAR + Optional(ident("methodParam")) + RPAR + RETURNS_ + LPAR + Optional(ident("methodReturn")) + RPAR) # serviceDefn ::= 'service' ident '{' methodDefn* '}' serviceDefn = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore( Group(methodDefn)) + RBRACE # packageDirective ::= 'package' ident [ '.' ident]* ';' packageDirective = Group(PACKAGE_ - delimitedList(ident, '.', combine=True) + SEMI) comment = '//' + restOfLine importDirective = IMPORT_ - quotedString("importFileSpec") + SEMI optionDirective = OPTION_ - ident("optionName") + EQ + quotedString( "optionValue") + SEMI topLevelStatement = Group(messageDefn | messageExtension | enumDefn | serviceDefn | importDirective | optionDirective) parser = Optional(packageDirective) + ZeroOrMore(topLevelStatement) parser.ignore(comment) test1 = """message Person { required int32 id = 1; required string name = 2; optional string email = 3;
# messageBody ::= { fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension }* messageBody << Group( ZeroOrMore( Group(fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension))) # methodDefn ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';' methodDefn = (RPC_ - ident("methodName") + LPAR + Optional(ident("methodParam")) + RPAR + RETURNS_ + LPAR + Optional(ident("methodReturn")) + RPAR) # serviceDefn ::= 'service' ident '{' methodDefn* '}' serviceDefn = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore( Group(methodDefn)) + RBRACE syntaxDefn = SYNTAX_ + EQ - quotedString("syntaxString") + SEMI # packageDirective ::= 'package' ident [ '.' ident]* ';' packageDirective = Group(PACKAGE_ - delimitedList(ident, '.', combine=True) + SEMI) comment = '//' + restOfLine importDirective = IMPORT_ - quotedString("importFileSpec") + SEMI optionDirective = OPTION_ - ident("optionName") + EQ + quotedString( "optionValue") + SEMI topLevelStatement = Group(messageDefn | messageExtension | enumDefn | serviceDefn | importDirective | optionDirective | syntaxDefn)
__version__, ) ParserElement.enablePackrat() grammar = Forward() expression = Forward() # Literals intNumber = Combine(Optional("-") + Word(nums))("integer") floatNumber = Combine(Optional("-") + Word(nums) + Literal(".") + Word(nums))("float") sciNumber = Combine((floatNumber | intNumber) + CaselessLiteral("e") + intNumber)("scientific") aString = quotedString("string") # Use lookahead to match only numbers in a list (can't remember why this is necessary) afterNumber = FollowedBy(",") ^ FollowedBy(")") ^ FollowedBy(LineEnd()) number = Group((sciNumber + afterNumber) | (floatNumber + afterNumber) | (intNumber + afterNumber))("number") boolean = Group(CaselessKeyword("true") | CaselessKeyword("false"))("boolean") argname = Word(alphas + "_", alphanums + "_")("argname") funcname = Word(alphas + "_", alphanums + "_")("funcname") ## Symbols leftParen = Literal("(").suppress() rightParen = Literal(")").suppress() comma = Literal(",").suppress() equal = Literal("=").suppress()
cvtDict = lambda toks: dict(toks.asList()) cvtList = lambda toks: [toks.asList()] # define punctuation as suppressed literals lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon, comma = map( pp.Suppress, "()[]{}:,") integer = pp.Regex(r"[+-]?\d+").setName("integer").setParseAction(cvtInt) real = pp.Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName( "real").setParseAction(cvtReal) tupleStr = pp.Forward() listStr = pp.Forward() dictStr = pp.Forward() unistr = pp.unicodeString().setParseAction(lambda t: t[0][2:-1]) quoted_str = pp.quotedString().setParseAction(lambda t: t[0][1:-1]) boolLiteral = pp.oneOf("True False", asKeyword=True).setParseAction(cvtBool) noneLiteral = pp.Keyword("None").setParseAction(pp.replaceWith(None)) listItem = (real | integer | quoted_str | unistr | boolLiteral | noneLiteral | pp.Group(listStr) | tupleStr | dictStr) tupleStr <<= (lparen + pp.Optional(pp.delimitedList(listItem)) + pp.Optional(comma) + rparen)
Optional(identifier("request")) + RPAR + RETURNS + LPAR + Optional(identifier("response")) + RPAR))("method_definition") method_definition.setParseAction(method_definition_fn) service_definition = (SERVICE - identifier("service") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE)("service_definition") service_definition.setParseAction(service_definition_fn) package_directive = (Group(PACKAGE - delimitedList(identifier, '.', combine=True) + SEMI))("package_directive") package_directive.setParseAction(package_directive_fn) import_directive = IMPORT - quotedString("import") + SEMI import_directive.setParseAction(import_directive_fn) option_directive = OPTION - identifier("option") + EQ + ( integer | quotedString)("value") + SEMI top_level_statement = Group(message_definition | enum_definition | option_directive | import_directive | service_definition)("top_level_statement") top_level_statement.setParseAction(top_level_statement_fn) syntax_directive = (SYNTAX + EQ + quotedString("syntax_version") + SEMI)("syntax_directive") parser = (Optional(syntax_directive) + Optional(package_directive) + ZeroOrMore(top_level_statement))("parser")
__author__ = "bruce" __date__ = "$Oct 21, 2015 10:35:22 AM$" from pyparsing import Word, alphas, alphanums, delimitedList, quotedString, Group from pprint import pprint #define the grammar # The objects OBJECT = Word(alphas, alphanums+'_') OBJECTS = delimitedList(OBJECT('object')) OBJLIST = '{' + OBJECTS('objects') + '}' #The arguments KEY = Word(alphas) VALUE = quotedString() KVPAIR = Group(KEY('key')+'='+VALUE('value')) KVPAIRS = delimitedList(KVPAIR('kvpair')) KVLIST = '(' + KVPAIRS('kvpairs') + ')' # The command COMNAME = Word(alphas) COMMAND = OBJLIST('objlist') + COMNAME('comname') + KVLIST('kvlist') COMSPEC = COMMAND('command') if __name__ == "__main__": print "testing1" for fn,s,e in OBJLIST.scanString("{h,k}"): for obj in fn.objects: print "Object: "+obj
# methodDefn ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';' methodDefn = (RPC_ - ident("methodName") + LPAR + Optional(STREAM_("paramStreamQualifier")) + Optional(ident("methodParam")) + RPAR + RETURNS_ + LPAR + Optional(STREAM_("returnStreamQualifier")) + Optional(ident("methodReturn")) + RPAR + ((LBRACE + RBRACE) | SEMI)) # serviceDefn ::= 'service' ident '{' methodDefn* '}' serviceDefn = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore( Group(methodDefn) )('RPCs') + RBRACE # packageDirective ::= 'package' ident [ '.' ident]* ';' packageDirective = Group(PACKAGE_ - delimitedList(ident('packageName'), '.', combine=True) + SEMI) syntaxDirective = Group(SYNTAX_ - EQ + ident + SEMI) comment = '//' + restOfLine importDirective = IMPORT_ - quotedString("importFileSpec") + SEMI optionDirective = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI topLevelStatement = Group(messageExtension | enumDefn | importDirective | optionDirective| serviceDefn | messageDefn )# | Group(serviceDefn)('services') | Group(messageDefn)('messages') parser = Optional(syntaxDirective)("syntax") + (Optional(packageDirective) + ZeroOrMore(topLevelStatement))('package') parser.ignore(comment) test1 = """message Person { required int32 id = 1; required string name = 2; optional string email = 3; }"""
from pyparsing import quotedString from datetime import datetime nw = datetime.utcnow() nowstring = '"%s"' % (nw.strftime("%d %b %Y %X")[:-3] + " UTC") print(nowstring) quoted_time = quotedString() quoted_time.setParseAction(lambda: nowstring) version_time = "__versionTime__ = " + quoted_time with open('pyparsing.py') as oldpp: new_code = version_time.transformString(oldpp.read()) with open('pyparsing.py', 'w') as newpp: newpp.write(new_code)
expression = Forward() # Literals intNumber = Combine( Optional('-') + Word(nums) )('integer') floatNumber = Combine( Optional('-') + Word(nums) + Literal('.') + Word(nums) )('float') sciNumber = Combine( (floatNumber | intNumber) + CaselessLiteral('e') + intNumber )('scientific') aString = quotedString('string') # Use lookahead to match only numbers in a list (can't remember why this # is necessary) afterNumber = FollowedBy(",") ^ FollowedBy(")") ^ FollowedBy(LineEnd()) number = Group( (sciNumber + afterNumber) | (floatNumber + afterNumber) | (intNumber + afterNumber) )('number') boolean = Group( CaselessKeyword("true") | CaselessKeyword("false") )('boolean')
colon = pp.Literal(':').suppress() key = pp.Forward() # A key is an immutable type keys = pp.delimitedList(key, ',') value = pp.Forward() # A value can be mutable or immutable values = pp.delimitedList(value, ',') pair = pp.Group(key + colon + value) pairs = pp.delimitedList(pair, ',') # Immutable types bool_ = (pp.Literal('True') | pp.Literal('False')).setParseAction(lambda t: t[0] == 'True') empty_tuple = (pp.Literal('()') | pp.Literal('tuple()')).setParseAction(pp.replaceWith(tuple())) keytuple = (lround + keys + rround).setParseAction(lambda t: tuple([i for i in t])) number = pp.Word(pp.nums + '+-.').setParseAction(lambda t: float(t[0]) if '.' in t[0] else int(t[0])) none = pp.Literal('None').setParseAction(pp.replaceWith(None)) string = pp.quotedString().setParseAction(lambda t: eval(t[0])) # pp.removeQuotes doesn't handle escaped quotes # Mutable types empty_dict = (pp.Literal('{}') | pp.Literal('dict()')).setParseAction(pp.replaceWith(dict())) dict_ = (lcurly + pairs + rcurly).setParseAction(lambda t: {k: v for k, v in t}) empty_list = (pp.Literal('[]') | pp.Literal('list()')).setParseAction(lambda t: [list()]) # Double-up lists list_ = (lsquare + values + rsquare).setParseAction(lambda t: [[i for i in t]]) # to parse as values empty_listdict = pp.Literal('listdict()').setParseAction(lambda t: OrderedDict()) listdict_ = (lsquare + pairs + rsquare).setParseAction(lambda t: OrderedDict([(k, v) for k, v in t])) empty_set = pp.Literal('set()').setParseAction(pp.replaceWith(set())) set_ = (lcurly + keys + rcurly).setParseAction(lambda t: {i for i in t}) valuetuple = (lround + values + rround).setParseAction(lambda t: tuple([i for i in t])) key << (bool_ | empty_tuple | keytuple | number | none | string) value << (key | empty_dict | dict_ | empty_list | list_ | empty_listdict | listdict_ | empty_set | set_ | valuetuple)
pattern = Forward() pattern << node.setResultsName("nodes", listAllMatches=True) + ZeroOrMore( edge.setResultsName("edges", listAllMatches=True) + pattern ) ################### PREDICATE CLAUSES ####################### # Comma seperated argument pattern csv_pattern = Forward() csv_pattern << var.setResultsName("pattern", listAllMatches=True) + ZeroOrMore( Suppress(Literal(",")) + csv_pattern ) # Getter/Setter Pattern. attr = (var + Literal(".") + var).setParseAction(lambda t: ''.join(t)) right = attr("value_lookup") | quotedString("value").setParseAction(removeQuotes) gttr_sttr = ( var("key") + Suppress(Literal("=")) + right ) pred_pattern = Forward() pred_pattern << gttr_sttr.setResultsName("pattern", listAllMatches=True) + ZeroOrMore( Suppress(Literal(",")) + pred_pattern ) ################# DELETE ##################### delete = CaselessKeyword("DELETE") delete.setParseAction(lambda t: t[0].lower()) delete_clause = delete + csv_pattern
def banana_grammar(emitter=emit.PrintEmitter()): """ Generate a banana parser that can be then used to parse a banana content. It build an AST on which operation can then be applied. :return: Return a banana parser :rtype: BananaScopeParser """ # Should debug debug_grammar = False # Actions def action_str_lit(s, l, t): return ast.StringLit(ast.make_span(s, l, t), t[0]) def action_num_lit(s, l, t): return ast.Number(ast.make_span(s, l, t), t[0]) def action_ident(s, l, t): return ast.Ident(ast.make_span(s, l, t), t[0]) def action_expr(s, l, t): if len(t) != 1: raise exception.BananaGrammarBug( 'Bug found in the grammar for expression,' ' Please report this bug.') if isinstance(t[0], ast.Expr): return t[0] return ast.Expr(ast.make_span(s, l, t), t[0]) def action_dot_path(s, l, t): # First token is the name of the variable # The rest is the property path if isinstance(t[0], ast.StringLit) and len(t[1:]) == 0: return t[0] return ast.DotPath(ast.make_span(s, l, t), t[0], t[1:]) def action_json_obj(s, l, t): return ast.JsonObj(ast.make_span(s, l, t), t) def action_parse_ctor_arg(s, l, t): if len(t) > 1: return ast.ComponentCtorArg(ast.make_span(s, l, t), t[1], t[0]) else: return ast.ComponentCtorArg(ast.make_span(s, l, t), t[0]) def action_parse_comp_ctor(s, l, tokens): comp = ast.Component(ast.make_span(s, l, tokens)) for tok in tokens: if isinstance(tok, ast.Ident): comp.set_ctor(tok) elif isinstance(tok, ast.ComponentCtorArg): comp.add_arg(tok) else: raise exception.BananaGrammarBug( 'Bug found in the grammar, Please report this bug') return comp def action_assignment(s, l, t): return ast.Assignment(ast.make_span(s, l, t), t[0], t[1]) def action_create_connections(s, l, t): ast_conn = ast.into_connection(t[0]) ast_conn.span = ast.make_span(s, l, t) for i in range(1, len(t)): next_conn = ast.into_connection(t[i]) ast_conn.connect_to(next_conn, emitter) return ast_conn def action_merge_connections(s, l, t): ast_conn = ast.Connection(ast.make_span(s, l, t)) ast_conn.merge_all(t, emitter) return ast_conn def action_root_ast(s, l, tokens): root = ast.BananaFile(emitter) for tok in tokens: if isinstance(tok, ast.Assignment): if isinstance(tok.rhs, ast.Component): root.add_component_ctor(tok.lhs, tok.rhs) else: root.add_assignment(tok.lhs, tok.rhs) elif isinstance(tok, ast.Connection): root.add_connections(tok) else: raise exception.BananaGrammarBug( 'Bug found in the grammar, Please report this bug.') return root # TODO(Joan): Remove once it is no longer needed def print_stmt(s, l, t): print("\nPRINT AST") print((l, [str(x) for x in t])) print("END PRINT AST\n") def action_unimplemented(s, l, t): raise exception.BananaGrammarBug("unimplemented code reached") # Tokens equals = p.Literal("=").suppress().setName('"="').setDebug(debug_grammar) arrow = p.Literal("->").suppress().setName('"->"').setDebug(debug_grammar) lbra = p.Literal("[").suppress().setName('"["').setDebug(debug_grammar) rbra = p.Literal("]").suppress().setName('"]"').setDebug(debug_grammar) colon = p.Literal(":").suppress().setName('":"') comma = p.Literal(",").suppress().setName(",") less = p.Literal("<").suppress().setName('"<"') greater = p.Literal(">").suppress().setName('">"') lbrace = p.Literal("{").suppress().setName('"{"').setDebug(debug_grammar) rbrace = p.Literal("}").suppress().setName('"}"').setDebug(debug_grammar) lpar = p.Literal("(").suppress().setName('"("') rpar = p.Literal(")").suppress().setName('")"') # Keywords ing = p.Literal("ing").suppress() imp = p.Literal("import").suppress() fro = p.Literal("from").suppress() # String Literal, Numbers, Identifiers string_lit = p.quotedString()\ .setParseAction(action_str_lit)\ .setName(const.STRING_LIT) number_lit = p.Regex(r'\d+(\.\d*)?([eE]\d+)?')\ .setParseAction(action_num_lit)\ .setName(const.NUMBER) ident = p.Word(p.alphas + "_", p.alphanums + "_")\ .setParseAction(action_ident)\ .setName(const.IDENT) # Path for properties dot_prop = ident | string_lit dot_path = p.delimitedList(dot_prop, ".")\ .setParseAction(action_dot_path)\ .setName(const.DOT_PATH)\ .setDebug(debug_grammar) # Expressions # Here to simplify the logic, we can match directly # against ident and string_lit to avoid having to deal # only with dot_path. It also allow to remove the confusion # where '"a"' could be interpreted as a dot_path and would thus # be the same as 'a'. With the following, the first we # always be type-checked as a String whereas the latter will # be as the type of the variable. expr = p.infixNotation(number_lit | dot_path, [ (p.oneOf('* /'), 2, p.opAssoc.LEFT), (p.oneOf('+ -'), 2, p.opAssoc.LEFT), ], lpar=lpar, rpar=rpar) expr.setParseAction(action_expr)\ .setName(const.EXPR)\ .setDebug(debug_grammar) # Json-like object (value are much more) json_obj = p.Forward() json_value = p.Forward() json_array = p.Group(lbra + p.Optional(p.delimitedList(json_value)) + rbra) json_array.setDebug(debug_grammar) json_array.setName(const.JSON_ARRAY) json_value <<= expr | json_obj | json_array json_value.setDebug(debug_grammar)\ .setName(const.JSON_VALUE) json_members = p.delimitedList(p.Group(dot_path + colon - json_value)) +\ p.Optional(comma) json_members.setDebug(debug_grammar)\ .setName(const.JSON_MEMBERS) json_obj <<= p.Dict(lbrace + p.Optional(json_members) - rbrace) json_obj.setParseAction(action_json_obj)\ .setName(const.JSON_OBJ)\ .setDebug(debug_grammar) # Component constructor arg = (ident + equals - (expr | json_obj)) | expr | json_obj arg.setParseAction(action_parse_ctor_arg) params = p.delimitedList(arg) comp_ctor = ident + lpar - p.Optional(params) + rpar comp_ctor.setParseAction(action_parse_comp_ctor)\ .setName(const.COMP_CTOR)\ .setDebug(debug_grammar) # Assignments assignment = dot_path + equals - (comp_ctor | expr | json_obj) assignment.setParseAction(action_assignment) # Connections connection = p.Forward() array_of_connection = p.Group(lbra + p.Optional(p.delimitedList(connection)) + rbra) array_of_connection.setParseAction(action_merge_connections) last_expr = ident | array_of_connection this_expr = p.Forward() match_expr = p.FollowedBy(last_expr + arrow - last_expr) + \ (last_expr + p.OneOrMore(arrow - last_expr)) this_expr <<= match_expr | last_expr connection <<= this_expr match_expr.setDebug(debug_grammar)\ .setName(const.CONNECTION) \ .setParseAction(action_create_connections) # Definitions definition = ing - less - string_lit - greater - ident - lbrace - rbrace definition.setDebug(debug_grammar)\ .setName(const.DEFINITION)\ .setParseAction(action_unimplemented) # Import directive module_def = (imp - ident) | fro - ident - imp - ident module_def.setDebug(debug_grammar)\ .setName(const.MOD_IMPORT)\ .setParseAction(action_unimplemented) # Comments comments = "#" + p.restOfLine statement = assignment | \ match_expr | \ definition | \ module_def statement.setName(const.STATEMENT) statement.setDebug(debug_grammar) statement.setParseAction(print_stmt) # Grammar grammar = p.OneOrMore(statement).ignore(comments) grammar.setParseAction(action_root_ast) return BananaScopeParser(grammar)
rhs = m if rhs else m return (m + (op + rhs)[...]).setParseAction(onfound(cls)) L = pp.Literal atleast = pp.OneOrMore opt = pp.Optional LPAREN = pp.Suppress('(') RPAREN = pp.Suppress(')') LBRACK = pp.Suppress('[') RBRACK = pp.Suppress(']') COLON = pp.Suppress(':') NAME = pp.Word(pp.alphas + '_', pp.alphanums + '_') string = pp.quotedString().setParseAction(pp.removeQuotes) number = ppc.number() comment = '#' + pp.restOfLine var = NAME.copy().setParseAction(el.Var) expr = pp.Forward() stmt = pp.Forward() stack = [1] suite = pp.indentedBlock(expr, stack) params = pp.Optional(pp.delimitedList(expr)) ruledecl = pp.Suppress('rule') + NAME + \ pp.Group(LPAREN + params + RPAREN) + COLON ruledef = pp.Group(ruledecl + suite).setParseAction(el.Rule)
def build_sequence(expression: str, evaluation_function: Callable[[str], bool]=eval) -> Sequence: """ Parse an expression and return the corresponding sequence according to the following mini-language: - atom: - "code" or 'code': a fragment of code (e.g. Python code) representing a Boolean expression that evaluates to true or false. The semantics is "satisfied once": as soon as the code evaluates to true once, the truth value of the expression remains true. This is equivalent as "sometimes 'code'" in linear temporal logic. - constants: - failure: this constant always evaluates to false. - success: this constant always evaluates to true. - unary operators: - never A: this expression evaluates to false as soon as expression A evaluates to true. - binary operators: - A and B: logical and - A or B: logical or - A -> B: this is equivalent to "(next always B) since A" in linear temporal logic, i.e. B has to be true (strictly) since A holds. Notice that, due to the "satisfied once" semantics of the atoms, if A and B are atoms, this is merely equivalent to "(A and next (sometimes B))", which means A needs to be true strictly before B or, in other words, A must be satisfied once, then B must be holds once. Keywords are case-insensitive. Parentheses can be used to group sub expressions. Unary operators have precedence over binary ones (e.g. "A and never B" is equivalent to "A and (never B)"). Unary operators are right associative while binary operators are left associative (e.g. "A and B and C" is equivalent to "(A and B) and C"). The binary operators are listed in decreasing priority (e.g. "A or B and C" is equivalent to "A or (B and C)", and "A and B -> C or D" is equivalent to "(A and B) -> (C or D)"). Examples (assuming that expressions between quotes can be evaluated to true or false): - "put water" -> "put coffee": ensures water is put before coffee. - "put water" and "put coffee": ensures water and coffee are put. Due to the "satisfied once" semantics of the atoms, the order in which items are put does not matter. - (never "put water") or ("put water" -> "put coffee"): if water is put, then coffee must be put too. - never ("put water" -> "put water"): the condition will fail if water is put twice (but will succeed if water is put once or never put). - "put water" -> (never "put water"): put water exactly once. :param expression: an expression to parse :param evaluation_function: the function that will be called to evaluate nested pieces of code :return: a *Sequence* instance. """ def unary_operator(func, term): term = term[0] return func(term[0]) def binary_operator(func, term): term = term[0] return reduce(func, term[1:], term[0]) condition = pyparsing.quotedString().setParseAction( lambda s, l, t: SequenceSometimes(SequenceCondition(t[0][1:-1], evaluation_function)) ) constants = ( pyparsing.CaselessKeyword('failure').setParseAction(SequenceFailure) | pyparsing.CaselessKeyword('success').setParseAction(SequenceSuccess) ) unary_ops = [(pyparsing.CaselessKeyword(keyword).suppress(), 1, pyparsing.opAssoc.RIGHT, partial(unary_operator, func)) for keyword, func in [ ('never', SequenceNever), ]] binary_ops = [(pyparsing.CaselessKeyword(keyword).suppress(), 2, pyparsing.opAssoc.LEFT, partial(binary_operator, func)) for keyword, func in [ ('and', SequenceAnd), ('or', SequenceOr), ('->', SequenceThen), ]] operands = (constants | condition) expr = pyparsing.operatorPrecedence(operands, unary_ops + binary_ops) try: return expr.parseString(expression, parseAll=True)[0] except pyparsing.ParseBaseException as e: raise StatechartError('Invalid sequential condition:\n%s' % expression) from e
def checkQuotedColon(s, loc, toks): if ":" in toks[0]: raise InvalidSQL("identifier with colon : must be in double quotes.") def checkDoubleQuotes(s, loc, toks): # TODO really? if toks[0][0] == "'": raise InvalidSQL("quoted strings must use double quotes.") ident = Word(alphas, alphanums + "_:").setParseAction(checkQuotedColon) columnName = (ident | quotedString().setParseAction(checkDoubleQuotes))("columnName") whereExpression = Forward() and_ = Keyword("and", caseless=True)("and") or_ = Keyword("or", caseless=True)("or") in_ = Keyword("in", caseless=True)("in") isnotnull = Keyword("is not null", caseless=True)("notnull") binop = oneOf("= != < > >= <=", caseless=True)("binop") intNum = Word(nums) columnRval = (intNum | quotedString)("rval*") whereCondition = Group((columnName + isnotnull) | (columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | ("(" + whereExpression + ")"))("condition")
# Match/Transformation pattern. pattern = Forward() pattern << node.setResultsName("nodes", listAllMatches=True) + ZeroOrMore( edge.setResultsName("edges", listAllMatches=True) + pattern) ################### PREDICATE CLAUSES ####################### # Comma seperated argument pattern csv_pattern = Forward() csv_pattern << var.setResultsName("pattern", listAllMatches=True) + ZeroOrMore( Suppress(Literal(",")) + csv_pattern) # Getter/Setter Pattern. attr = (var + Literal(".") + var).setParseAction(lambda t: ''.join(t)) right = attr("value_lookup") | quotedString("value").setParseAction( removeQuotes) gttr_sttr = (var("key") + Suppress(Literal("=")) + right) pred_pattern = Forward() pred_pattern << gttr_sttr.setResultsName( "pattern", listAllMatches=True) + ZeroOrMore(Suppress(Literal(",")) + pred_pattern) ################# DELETE ##################### delete = CaselessKeyword("DELETE") delete.setParseAction(lambda t: t[0].lower()) delete_clause = delete + csv_pattern ################## SET ######################## setter = CaselessKeyword("SET")
message_body << Group(ZeroOrMore(message_line))("message_body") message_body.setParseAction(message_body_fn) method_definition= ((RPC - identifier("method") + LPAR + Optional(identifier("request")) + RPAR + RETURNS + LPAR + Optional(identifier("response")) + RPAR))("method_definition") method_definition.setParseAction(method_definition_fn) service_definition= (SERVICE - identifier("service") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE)("service_definition") service_definition.setParseAction(service_definition_fn) package_directive = (Group(PACKAGE - delimitedList(identifier, '.', combine=True) + SEMI))("package_directive") package_directive.setParseAction(package_directive_fn) import_directive = IMPORT - quotedString("import") + SEMI import_directive.setParseAction(import_directive_fn) option_directive = OPTION - identifier("option") + EQ + (integer | quotedString)("value") + SEMI top_level_statement = Group(message_definition| enum_definition| option_directive | import_directive | service_definition)("top_level_statement") top_level_statement.setParseAction(top_level_statement_fn) syntax_directive = (SYNTAX + EQ + quotedString("syntax_version") + SEMI)("syntax_directive") parser = (Optional(syntax_directive) + Optional(package_directive) + ZeroOrMore(top_level_statement))("parser") parser.setParseAction(parser_fn) comment = '//' + restOfLine parser.ignore(comment)
# Minimal BNF. See below for a more comprehensive BNF bnf = """ listdict [ keyvaluepairs ] 'listdict()' keyvaluepairs keyvaluepair keyvaluepairs , keyvaluepair keyvaluepair key : value """ key = pp.quotedString() colon = pp.Literal(':').suppress() value = pp.quotedString() keyvaluepair = pp.Group(key + colon + value) keyvaluepairs = pp.delimitedList(keyvaluepair, ',') lsquare = pp.Literal('[').suppress() rsquare = pp.Literal(']').suppress() empty = pp.Literal('listdict()').suppress() parser = (lsquare + keyvaluepairs + rsquare | empty) class listdict(OrderedDict): """ listdict is a Pythonic way of representing an OrderedDict.