def clear_sql(sql: str) -> str: ''' remove comments from sql TODO current implementation is not remove /**/ from mid of string: select a, /*comment*/ from b ''' # remove /*comment*/ ParserElement.defaultWhitespaceChars = (" \t") comment = nestedExpr('/*', '*/').suppress() starting = ZeroOrMore(comment.suppress()) ending = ZeroOrMore(comment | ';').suppress() + StringEnd() expr = starting + SkipTo(ending) + ending sql = expr.transformString(sql) # remove -- and # comments oracleSqlComment = Literal("--") + restOfLine mySqlComment = Literal("#") + restOfLine expr = (originalTextFor(QuotedString("'")) | originalTextFor(QuotedString('"')) | originalTextFor(QuotedString('`')) | (oracleSqlComment | mySqlComment).suppress()) sql = expr.transformString(sql) sql = sql.strip(' \n\t') return sql
def parse_as_create_predictor(self) -> dict: CREATE, PREDICTOR, FROM, WHERE, PREDICT, AS, ORDER, GROUP, BY, WINDOW, HORIZON, USING, ASK, DESC = map( CaselessKeyword, "CREATE PREDICTOR FROM WHERE PREDICT AS ORDER GROUP BY WINDOW HORIZON USING ASK DESC" .split()) ORDER_BY = ORDER + BY GROUP_BY = GROUP + BY word = Word(alphanums + "_") s_int = Word(nums).setParseAction(tokenMap(int)) predict_item = Group( word('name') + Optional(AS.suppress() + word('alias'))) order_item = Group(word('name') + Optional(ASK | DESC)('sort')) using_item = Group( word('name') + Word('=').suppress() + (word | QuotedString("'"))('value')) expr = ( CREATE + PREDICTOR + word('predictor_name') + FROM + Optional(word)('integration_name') + originalTextFor(nestedExpr('(', ')'))('select') + Optional(AS + word('datasource_name')) + PREDICT + delimitedList(predict_item, delim=',')('predict') + Optional(ORDER_BY + delimitedList(order_item, delim=',')('order_by')) + Optional(GROUP_BY + delimitedList(word, delim=',')('group_by')) + Optional(WINDOW + s_int('window')) + Optional(HORIZON + s_int('nr_predictions')) + Optional( (USING + delimitedList(using_item, delim=',')('using')) | (USING + originalTextFor(nestedExpr('{', '}'))('using')))) r = expr.parseString(self._sql) # postprocessing r = r.asDict() if r['select'].startswith('(') and r['select'].endswith(')'): r['select'] = r['select'][1:-1] r['select'] = r['select'].strip(' \n') using = r.get('using') if isinstance(using, str): r['using'] = json.loads(using) elif isinstance(using, list): new_using = {} for el in using: if el['name'] == 'stop_training_in_x_seconds': new_using['time_aim'] = el['value'] else: new_using[el['name']] = el['value'] r['using'] = new_using if isinstance(r.get('order_by'), list): r['order_by'] = [x['name'] for x in r['order_by']] return r
def parse_config_file(filepath): """ This function defines that to parsed the netscalar input file :param filepath: path of netscalar input configuration :return: return parsed dict """ EOL = LineEnd().suppress() comment = Suppress("#") + Suppress(restOfLine) + EOL SOL = LineStart().suppress() blank_line = SOL + EOL result = [] hyphen = Literal("-") not_hyphen_sign = ''.join(c for c in printables if c != '-') text = Word(not_hyphen_sign, printables) key = Word('-', printables).setParseAction(lambda t: t[0].replace('-', '', 1)) val = originalTextFor(Optional(ZeroOrMore(text), default=None)) option = Group(key + val) multi_word_names = quotedString q_obj = originalTextFor(Keyword('q{') + SkipTo(Keyword("}"))) command = Group( OneOrMore(q_obj | multi_word_names | text) + ZeroOrMore(option)) command.ignore(comment | blank_line) with open(filepath) as infile: line_no = 1 print("Parsing Input Configuration...") lines = infile.readlines() total_lines = len(lines) for line in lines: try: tmp = command.parseString(line) tokens = tmp.asList() if tokens: tokens[0].append(['line_no', str(line_no)]) result += tokens line_no += 1 except Exception as exception: line_no += 1 LOG.error("Parsing error: " + line) msg = "Parsing started..." if line_no <= total_lines: ns_util.print_progress_bar(line_no, total_lines, msg, prefix='Progress', suffix='') return result
def _define_vs(): KEY = Word(alphas + '_$', alphanums + '_$').setName('identifier').setResultsName('key') # noqa VALUE = originalTextFor(_define_json()).setResultsName('value') # validator name, eg: int NAME = Optional( Optional(Suppress('?')) + pyparsing_common.identifier.setResultsName('name')) # noqa # refers, eg: @xx@yy REFERS = Group(ZeroOrMore(Suppress('@') + pyparsing_common.identifier)).setResultsName( 'refers') # noqa # args, eg: (), (1), (1,2,3), ([1,2], {"key":"value"}, "Any JSON") ARGS = Group( Optional( Suppress('(') + Optional(delimitedList(VALUE)) + Suppress(')'))).setResultsName('args') # noqa # key-value, eg: key, key=True, key=[1,2,3] KW = Group(KEY + Optional(Suppress('=') + VALUE)) # kwargs, eg: &key1&key2=True&key3=[1,2,3] KWARGS = Group(ZeroOrMore(Suppress('&') + KW)).setResultsName('kwargs') # lead xxx is key: xxx@yyy, xxx?yyy, $self&abc # lead xxx except '$self' is validator name: xxx(1,2), xxx&abc, xxx SELF = Literal('$self').setResultsName('key') VS_KEY = Optional((KEY + FollowedBy(Word('@?'))) | SELF) VS_DEF = REFERS + NAME + ARGS + KWARGS return StringStart() + VS_KEY + VS_DEF + StringEnd()
def parse_as_create_datasource(self) -> dict: ''' Parse 'CREATE DATASOURCE' query Example: CREATE DATASOURCE name FROM mysql WITH {"user": "******", "password": "******", "host": "127.0.0.1"} ''' result = { 'datasource_name': None, 'database_type': None, 'connection_args': None } expr = (CaselessKeyword("create").suppress() + CaselessKeyword("datasource").suppress() + Word(printables).setResultsName('datasource_name') + CaselessKeyword("from").suppress() + Word(printables).setResultsName('database_type') + CaselessKeyword("with").suppress() + originalTextFor(nestedExpr('{', '}'))('connection_args')) r = expr.parseString(self._sql).asDict() datasource_name = r.get('datasource_name') if isinstance(datasource_name, str) is False: raise Exception("Cant determine datasource name") result['datasource_name'] = datasource_name database_type = r.get('database_type') if isinstance(database_type, str) is False: raise Exception("Cant determine database type") result['database_type'] = database_type try: result['connection_args'] = json.loads(r.get('connection_args')) except Exception: raise Exception('Cant parse connection arguments.')
def _parser_piece_text(): """ Return PyParsing element to the text of a markdown link. """ # No double line breaks in markdown links double_line_break = (Word("\n\r", exact=1) + Optional(Word(" \t")) + Word("\n\r", exact=1)) # We will ignore escaped square brackets when match finding balanced # square brackets. ignore = Literal("\\[") | Literal("\\]") # The text parser will match text inside balanced brackets using the # nestedExpr helper function from PyParsing. # # Next we define the content that is allowed inside the brackets. content_character = ~FollowedBy(double_line_break) + CharsNotIn( "[]", exact=1) # Normally with nestedExpr, the content parser would be separately applied # to each whitespace-separated string within the nested expression. # However, since we set whitespaceChars to '', the content parser is # applied to characters one-at-a-time. # # If this ever changes, we would need to change content to something # like Combine(OneOrMore(~ignore + content_character)) content = content_character text = originalTextFor( nestedExpr( opener="[", closer="]", content=content, ignoreExpr=ignore, )).setResultsName("text") text.addParseAction(lambda s, l, toks: toks[0][1:-1]) return text
def func_tokens(dictionary, parse_action): func_name = Word(alphas+'_', alphanums+'_') func_ident = Combine('$' + func_name.copy()('funcname')) func_tok = func_ident + originalTextFor(nestedExpr())('args') func_tok.leaveWhitespace() func_tok.setParseAction(parse_action) func_tok.enablePackrat() rx_tok = Combine(Literal('$').suppress() + Word(nums)('num')) def replace_token(tokens): index = int(tokens.num) return dictionary.get(index, u'') rx_tok.setParseAction(replace_token) strip = lambda s, l, tok: tok[0].strip() text_tok = CharsNotIn(u',').setParseAction(strip) quote_tok = QuotedString('"') if dictionary: arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok)) else: arglist = Optional(delimitedList(quote_tok | text_tok)) return func_tok, arglist, rx_tok
def asn1_loads(asn1_str): """ Parse an ASN.1 file This is currently Pseudo-ASN; modify to become actual ASN.1 """ # ASN.1 grammar identifier = pp.Word(pp.alphas + "_") assign = pp.Literal("::=") # typedef = identifier.setName("typeref") + assign + identifier.setName("basetype") comment1 = pp.Literal("#") + pp.originalTextFor(pp.SkipTo(pp.LineEnd())) # typelist = pp.OneOrMore(typedef) meta1 = pp.LineStart() + identifier + pp.Literal(":") + pp.SkipTo( pp.LineEnd()).setDebug() meta2 = pp.LineStart() + pp.White() + pp.SkipTo(pp.LineEnd()).setDebug() metaval = meta1 + pp.ZeroOrMore(meta2) # metalist = pp.ZeroOrMore(comment1) + pp.Literal("/*") + pp.OneOrMore(metaval) + pp.Literal("*/") metalist = pp.SkipTo(pp.Literal("/*")).setDebug() + pp.Literal( "/*") + pp.OneOrMore(metaval).setDebug() + pp.Literal("*/") asn1 = metalist.parseString(asn1_str, parseAll=False) print(asn1) jaen = {"meta": {}, "types": []} return jaen
def postParse(self, instring, loc, tokenList): if self.evalfn: res = Expr(self.name) res._evalfn = MethodType(self.evalfn, res) else: res = CompValue(self.name) if self.name == "ServiceGraphPattern": # Then this must be a service graph pattern and have # already matched. # lets assume there is one, for now, then test for two later. sgp = originalTextFor(self.expr) service_string = sgp.searchString(instring)[0][0] res["service_string"] = service_string for t in tokenList: if isinstance(t, ParamValue): if t.isList: if t.name not in res: res[t.name] = plist() res[t.name].append(t.tokenList) else: res[t.name] = t.tokenList # res.append(t.tokenList) # if isinstance(t,CompValue): # res.update(t) return res
def func_tokens(dictionary, parse_action): func_name = Word(alphas + '_', alphanums + '_') func_ident = Combine('$' + func_name.copy()('funcname')) func_tok = func_ident + originalTextFor(nestedExpr())('args') func_tok.leaveWhitespace() func_tok.setParseAction(parse_action) func_tok.enablePackrat() rx_tok = Combine(Literal('$').suppress() + Word(nums)('num')) def replace_token(tokens): index = int(tokens.num) return dictionary.get(index, '') rx_tok.setParseAction(replace_token) strip = lambda s, l, tok: tok[0].strip() text_tok = CharsNotIn(',').setParseAction(strip) quote_tok = QuotedString('"') if dictionary: arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok)) else: arglist = Optional(delimitedList(quote_tok | text_tok)) return func_tok, arglist, rx_tok
def parse_template(template_text): identifier = Word(alphas, alphanums + '_') param = Group(identifier('name') + Suppress(':') + CharsNotIn(',)')('value')) param_list = Group(Suppress('(') + delimitedList(param, delim=',') + Suppress(')')) benchmark_id = originalTextFor(identifier + '.' + identifier + '.' + identifier) measurement_id = Group(benchmark_id('benchmark') + Optional(param_list('params')) + Suppress('[') + identifier('local_id') + Suppress(']')) macro = Group(Suppress('${') + measurement_id('measurement') + Suppress('}')) raw_text_block = originalTextFor(CharsNotIn('$')) text = ZeroOrMore(Group(raw_text_block('text') | macro('macro')))('template') text.leaveWhitespace() return text.parseString(template_text).asDict()
def parse_config_file(filepath): """ This function defines that to parsed the netscalar input file :param filepath: path of netscalar input configuration :return: return parsed dict """ EOL = LineEnd().suppress() comment = Suppress("#") + Suppress(restOfLine) + EOL SOL = LineStart().suppress() blank_line = SOL + EOL result = [] hyphen = Literal("-") not_hyphen_sign = ''.join(c for c in printables if c != '-') text = Word(not_hyphen_sign, printables) key = Word('-', printables).setParseAction( lambda t: t[0].replace('-', '', 1)) val = originalTextFor(Optional(ZeroOrMore(text), default=None)) option = Group(key + val) multi_word_names = quotedString q_obj = originalTextFor(Keyword('q{')+SkipTo(Keyword("}"))) command = Group(OneOrMore(q_obj | multi_word_names | text) + ZeroOrMore(option)) command.ignore(comment | blank_line) with open(filepath) as infile: line_no = 1 print "Parsing Input Configuration..." lines = infile.readlines() total_lines = len(lines) for line in lines: try: tmp = command.parseString(line) tokens = tmp.asList() if tokens: tokens[0].append(['line_no', str(line_no)]) result += tokens line_no += 1 except Exception as exception: line_no += 1 LOG.error("Parsing error: " + line) msg = "Parsing started..." if line_no <= total_lines: ns_util.print_progress_bar(line_no, total_lines, msg, prefix='Progress', suffix='') return result
def parse_as_create_ai_table(self) -> dict: CREATE, AI, TABLE, VIEW, FROM, USING, AS = map( CaselessKeyword, "CREATE AI TABLE VIEW FROM USING AS".split()) AI_TABLE = AI + TABLE word = Word(alphanums + "_") expr = (CREATE + (AI_TABLE | VIEW) + word('ai_table_name') + AS + originalTextFor(nestedExpr('(', ')'))('select')) r = expr.parseString(self._sql) r = r.asDict() if r['select'].startswith('(') and r['select'].endswith(')'): r['select'] = r['select'][1:-1] r['select'] = r['select'].strip(' \n') select = parse_sql(r['select']) if isinstance(select.from_table, Join) is False: raise Exception( "'from' must be like: 'from integration.table join predictor'") integration_name = select.from_table.left.parts[0] select.from_table.left.parts = select.from_table.left.parts[1:] integration_name_alias = select.from_table.left.alias.parts[0] predictor_name = select.from_table.right.parts[0] predictor_name_alias = select.from_table.right.alias.parts[0] select.from_table = select.from_table.left query_fields = [] predictor_fields = [] predictor_fields_targets = [] integration_sql = str(select) for target in select.targets: if target.parts[0] == integration_name_alias: query_fields.append(target.parts[1]) predictor_fields_targets.append(target) elif target.parts[0] == predictor_name_alias: predictor_fields.append(target.parts[1]) select.targets = predictor_fields_targets res = { 'ai_table_name': r['ai_table_name'], 'integration_name': integration_name, 'integration_query': integration_sql, 'query_fields': query_fields, 'predictor_name': predictor_name, 'predictor_fields': predictor_fields } return res
def cut_from_tail(self, text): ''' Removes 'text' from end of sql. Not case sensitive. ''' text_arr = text.split(' ') ending = CaselessKeyword(text_arr[0]) for x in text_arr[1:]: ending = ending + CaselessKeyword(x) ending = ending + StringEnd() expr = (originalTextFor(SkipTo(ending)))('original') + (originalTextFor(ending))('ending') try: r = expr.parseString(self._sql) except ParseException: return False self._sql = r.asDict()['original'].strip() return True
def write(self, stream="/dev/stddev"): if isinstance(stream,(str,unicode)): with open(stream, 'w') as f: return self.write(f) sstream = StringIO.StringIO() super(BbQuiz,self).write(sstream) text = sstream.getvalue() # replace $...$ with \math{...} text = pp.QuotedString(quoteChar='$',convertWhitespaceEscapes=False).setParseAction(lambda toks: r'\math{%s}'%toks[0]).transformString( text ) # Replace macros. command = pp.Word(pp.alphas) options = pp.originalTextFor( pp.nestedExpr( '[', ']' ) ) arguments = pp.originalTextFor( pp.nestedExpr( '{', '}' ) ) macro = pp.Combine( pp.Literal("\\") + command("command") + pp.ZeroOrMore(options)("options") + pp.ZeroOrMore(arguments)("arguments") ) macro.setParseAction( self.expand_macro ) # transform string until all macros have been expanded while True: newtext = macro.transformString( text ) if newtext == text: break text = newtext # try to catch some syntax errors that will cause Bb to choke # 1. MC or MA questions don't have a "correct" answer for line in text.split('\n'): if line.startswith('MC') or line.startswith('MA'): if not re.search("\tcorrect", line): print "WARNING: A multiple choice/answer question does not have a correct answer. Blackboard will not parse this." print "\t",line[3:50],'...' print stream.write( text )
def get_fragment_grammar(): # Match header [mapping] header = Suppress("[") + Suppress("mapping") + Suppress("]") # There are three possible patterns for mapping entries: # obj:symbol (scheme) # obj (scheme) # * (scheme) obj = Fragment.ENTITY.setResultsName("object") symbol = Suppress(":") + Fragment.IDENTIFIER.setResultsName("symbol") scheme = Suppress("(") + Fragment.IDENTIFIER.setResultsName( "scheme") + Suppress(")") pattern1 = Group(obj + symbol + scheme) pattern2 = Group(obj + scheme) pattern3 = Group( Literal(Mapping.MAPPING_ALL_OBJECTS).setResultsName("object") + scheme) mapping_entry = pattern1 | pattern2 | pattern3 # To simplify parsing, classify groups of condition-mapping entry into two types: normal and default # A normal grouping is one with a non-default condition. The default grouping is one which contains the # default condition mapping_entries = Group( ZeroOrMore(mapping_entry)).setResultsName("mappings") normal_condition = Suppress(":") + originalTextFor( SDKConfig.get_expression_grammar()) default_condition = Optional( Suppress(":") + Literal(Mapping.DEFAULT_CONDITION)) normal_group = Group( normal_condition.setResultsName("condition") + mapping_entries) default_group = Group(default_condition + mapping_entries).setResultsName("default_group") normal_groups = Group( ZeroOrMore(normal_group)).setResultsName("normal_groups") # Any mapping fragment definition can have zero or more normal group and only one default group as a last entry. archive = Suppress("archive") + Suppress( ":") + Fragment.ENTITY.setResultsName("archive") entries = Suppress("entries") + Suppress(":") + ( normal_groups + default_group).setResultsName("entries") mapping = Group(header + archive + entries) mapping.setParseAction(lambda t: Mapping(t[0].archive, t[0].entries)) mapping.ignore("#" + restOfLine) return mapping
def parse_element(cls, indent_stack): """Set. to the rule the list of producers in ``producer`` attribute.""" producer_body = ( Word(alphanums + "_") + originalTextFor(nestedExpr()) + Suppress(',') + PYTHON_ALLOWED_EXPR).setParseAction(lambda toks: { 'code': toks[0], 'params': eval(toks[1]), 'rule': eval(toks[2]) }) return (Keyword('producer:').suppress() + indentedBlock( OneOrMore(producer_body), indent_stack)).setResultsName('producer')
def bootstrap(config): """ Loads unit lists for use in this instance of the measurement parser :param config: cahoots config :type config: cahoots.config.BaseConfig """ units = {} systems = {} prepositions = DataHandler().get_prepositions() directory = os.path.dirname(os.path.abspath(__file__)) path = os.path.join(directory, "units/*.yaml") for file_path in glob.glob(path): unit_file = open(file_path, 'r') unit_type = yaml.load(unit_file) for unit in unit_type['keywords']: units[unit] = unit_type['id'] systems[unit_type['id']] = \ (unit_type['system'], unit_type['type']) preposition_parser = \ Or([CaselessLiteral(s) for s in prepositions]) + Word(alphas) measurement_parser = \ originalTextFor( Word(nums) + ZeroOrMore(',' + Word(nums+',')) + ZeroOrMore('.' + Word(nums)) + ZeroOrMore(Word(nums) + '/' + Word(nums)) ) + \ Or([CaselessLiteral(s) for s in units.keys()]) + \ Optional(originalTextFor(preposition_parser)) registry.set('MP_units', units) registry.set('MP_systems', systems) registry.set('MP_preposition_parser', preposition_parser) registry.set('MP_measurement_parser', measurement_parser)
def bootstrap(config): """ Loads unit lists for use in this instance of the measurement parser :param config: cahoots config :type config: cahoots.config.BaseConfig """ units = {} systems = {} prepositions = DataHandler().get_prepositions() directory = os.path.dirname(os.path.abspath(__file__)) path = os.path.join(directory, "units/*.yaml") for file_path in glob.glob(path): unit_file = open(file_path, 'r') unit_type = yaml.load(unit_file) for unit in unit_type['keywords']: units[unit] = unit_type['id'] systems[unit_type['id']] = \ (unit_type['system'], unit_type['type']) preposition_parser = \ Or([CaselessLiteral(s) for s in prepositions]) + Word(alphas) measurement_parser = \ originalTextFor( Word(nums, max=3) + ZeroOrMore(',' + Word(nums, exact=3)) + ZeroOrMore('.' + Word(nums)) + ZeroOrMore(Word(nums) + '/' + Word(nums)) ) + \ Or([CaselessLiteral(s) for s in units.keys()]) + \ Optional(originalTextFor(preposition_parser)) registry.set('MP_units', units) registry.set('MP_systems', systems) registry.set('MP_preposition_parser', preposition_parser) registry.set('MP_measurement_parser', measurement_parser)
def parse_element(cls, indent_stack): """Set to the rule the list of aggregations.""" aggregation_body = ( Word(alphanums + "_") + originalTextFor(nestedExpr()) + Suppress(',') + PYTHON_ALLOWED_EXPR).setParseAction(lambda toks: { 'name': toks[0], 'engine': eval(toks[1]), 'rule': eval(toks[2]) }) return (Keyword('aggregation:').suppress() + indentedBlock(OneOrMore(aggregation_body), indent_stack)).setResultsName('aggregation')
def parse_element(cls, indent_stack): """Set. to the rule the list of producers in ``producer`` attribute.""" producer_body = (Word(alphanums + "_") + originalTextFor(nestedExpr()) + Suppress(',') + PYTHON_ALLOWED_EXPR ).setParseAction( lambda toks: {'code': toks[0], 'params': eval(toks[1]), 'rule': eval(toks[2])}) return (Keyword('producer:').suppress() + indentedBlock(OneOrMore(producer_body), indent_stack) ).setResultsName('producer')
def create_pre_timedelta_literal(tok): """Detects <number> <timescale> <preposition>""" delta = originalTextFor(Or([ Word(nums) + ZeroOrMore(',' + Word(nums+',')) + ZeroOrMore('.' + Word(nums)), CaselessLiteral('an'), CaselessLiteral('a') ])) + CaselessLiteral(tok) + DateParser.get_preposition_literals() delta.setName('pre' + tok).\ setParseAction(DateParser.generate_pre_timedelta) return delta
def create_pre_timedelta_literal(tok): """Detects <number> <timescale> <preposition>""" delta = originalTextFor( Or([ Word(nums) + ZeroOrMore(',' + Word(nums + ',')) + ZeroOrMore('.' + Word(nums)), CaselessLiteral('an'), CaselessLiteral('a') ])) + CaselessLiteral(tok) + DateParser.get_preposition_literals() delta.setName('pre' + tok).\ setParseAction(DateParser.generate_pre_timedelta) return delta
def _parser_piece_destination_and_title(): """ Return PyParsing element to match the destination and title of a markdown link. """ # Capture everything between the balanced parentheses # Then parse it later. dest_and_title = originalTextFor(nestedExpr( opener="(", closer=")")).addParseAction(lambda s, l, toks: toks[0][1:-1]) destination = Combine( # Zero or more non-space characters. # But before each character (exact=1) check if we have a # shortcode. If we do, allow that. ZeroOrMore( originalTextFor(nestedExpr(opener=R"{{<", closer=">}}")) | originalTextFor(nestedExpr(opener=R"{{%", closer="%}}")) | CharsNotIn(" \t", exact=1))).setResultsName("destination") # CommonMark requires link title to be encased in single-quotes, # double-quotes, or wrapped in parentheses. Let's not bother with # the parentheses case for now. title = (quotedString.copy().setResultsName("title").setParseAction( lambda s, l, toks: unescape_quoted_string(toks[0]))) # This will parse the contents of dest_and_title dest_and_title_parser = destination + Optional(White(" ") + title) + StringEnd() def back_parse_action(_s, _l, toks): return dest_and_title_parser.parseString(toks[0]) dest_and_title.addParseAction(back_parse_action) return dest_and_title
def substfile_bnf(): """ substfile_bnf() Defines the parser grammar for each template block within the given substitutions file :return: PyParser BNF Expression """ expr = Keyword('file') + originalTextFor(Word(alphanums+'_$()/') + '.template')\ + l_brace\ + pattern_bnf()\ + OneOrMore(instance_bnf())\ + r_brace # Defines comments expr.ignore('#' + restOfLine) return expr
def create_post_timedelta_literal(tok): """Detects <plus/minus> <number> <timescale>""" delta = Or([CaselessLiteral(t) for t in ['+', '-', 'plus', 'minus']]) + originalTextFor( Or([ Word(nums) + ZeroOrMore(',' + Word(nums + ',')) + ZeroOrMore('.' + Word(nums)), CaselessLiteral('an'), CaselessLiteral('a') ])) + CaselessLiteral(tok) + StringEnd() delta.setName('post' + tok).\ setParseAction(DateParser.generate_post_timedelta) return delta
def parse_element(cls, indent_stack): """Set to the rule the list of aggregations.""" aggregation_body = ( Word(alphanums + "_") + originalTextFor(nestedExpr()) + Suppress(',') + PYTHON_ALLOWED_EXPR ).setParseAction(lambda toks: { 'name': toks[0], 'engine': eval(toks[1]), 'rule': eval(toks[2]) }) return (Keyword('aggregation:').suppress() + indentedBlock(OneOrMore(aggregation_body), indent_stack) ).setResultsName('aggregation')
def create_post_timedelta_literal(tok): """Detects <plus/minus> <number> <timescale>""" delta = Or( [CaselessLiteral(t) for t in ['+', '-', 'plus', 'minus']] ) + originalTextFor(Or([ Word(nums) + ZeroOrMore(',' + Word(nums+',')) + ZeroOrMore('.' + Word(nums)), CaselessLiteral('an'), CaselessLiteral('a') ])) + CaselessLiteral(tok) + StringEnd() delta.setName('post' + tok).\ setParseAction(DateParser.generate_post_timedelta) return delta
def getToken(self): assert self.start is not None assert self.end is not None assert self.start_html is not None assert self.end_html is not None assert self.name is not None token = originalTextFor( nestedExpr(opener=self.start, closer=self.end, content=None, ignoreExpr=self.ignore)) token = token.setParseAction( self.convertToHTML(self.start_html, self.end_html))(self.name) return token
def load(self, fh=None, ass=None): if ass is None: ass = Assignment() fh = super().get_fh(fh) text = fh.read() # look for a pandoc-style configuration section. this # will be a yaml file imbedded in the text between two sets of '---'. res = pyparsing.originalTextFor(pyparsing.QuotedString(quoteChar='---',multiline=True)).searchString( text ) config = None if len(res): text = text.replace(res[0][0],"") config = yaml.load(res[0][0].strip("-"), Loader=yaml.FullLoader) in_data = json.loads(self._markdown_to_json(text)) # need to process JSON before passing to the JSON reader qkey = "Questions" for k in in_data.keys(): if k.lower() == "questions": qkey = k out_data = dict() out_data['questions'] = list() i = 0 N = len(in_data.get(qkey,list())) while i < N: q = dict() q['text'] = in_data[qkey][i] if i+1 < N and isinstance( in_data[qkey][i+1], list ): i += 1 q['answer'] = dict() q['answer']['choices'] = in_data[qkey][i] elif self.throw_on_missing_answers: raise RuntimeError("A question without an answer was found. Question test '{}'".format(q['text']) ) out_data['questions'].append(q) i += 1 ass = JSON().load(io.StringIO(json.dumps(out_data))) if config is not None: ass.meta.__dict__.update( config ) return ass
def _get_value(wiki_markup, parse_actions=False): """Get value parser element. :param ParserElement wiki_markup: wiki markup value = wiki_markup; :returns: value parser element :rtype: ParserElement """ value = pyparsing.Combine( pyparsing.OneOrMore( pyparsing.originalTextFor(wiki_markup))).setResultsName("value") value.setName("value") value.parseWithTabs() if parse_actions: pass return value
def grammar(append_line_break=True): REF = Combine(Literal('"Ref"')) FN = Combine(Literal('"Fn::') + Word(alphanums) + Literal('"')) json_val = Forward() json_string = dblQuotedString json_list_items = delimitedList(json_val) json_list = Literal('[') + Optional(json_list_items) + Literal(']') json_dict_member = json_string + Literal(':') + json_val json_dict_members = delimitedList(json_dict_member) json_dict = Literal('{') + Optional(json_dict_members) + Literal('}') json_val << (json_string | json_list | json_dict) aws_member = (REF | FN) + Literal(':') + originalTextFor(json_val) aws = Group(Literal('{') + aws_member + Literal('}')).setParseAction(translate) aws_start = Literal('{').leaveWhitespace() + (REF | FN) term = aws_start | LineEnd() script_stuff = Group( ZeroOrMore(White()) + SkipTo(term) ).setParseAction(translate) script_end = 'ScriptEnd' if append_line_break else 'ScriptEndNLB' script_line = script_stuff(script_end) + ~aws_start + Suppress(LineEnd()) script_line_ending_with_aws = ( Optional(script_stuff('Script')) + aws('AWS') + Suppress(LineEnd()) ).setParseAction(translate) aws_script_start = Optional(script_stuff('Script')) + aws('AWS') script_line_containing_aws = aws_script_start + script_line script_line_containing_many_aws = ( OneOrMore(aws_script_start) + script_line) line = Group( script_line_ending_with_aws('AWSEnd') | script_line_containing_many_aws | script_line ).setParseAction(process_line) return OneOrMore(line('Line'))
def get_fragment_grammar(): # Match header [mapping] header = Suppress("[") + Suppress("mapping") + Suppress("]") # There are three possible patterns for mapping entries: # obj:symbol (scheme) # obj (scheme) # * (scheme) obj = Fragment.ENTITY.setResultsName("object") symbol = Suppress(":") + Fragment.IDENTIFIER.setResultsName("symbol") scheme = Suppress("(") + Fragment.IDENTIFIER.setResultsName("scheme") + Suppress(")") pattern1 = Group(obj + symbol + scheme) pattern2 = Group(obj + scheme) pattern3 = Group(Literal(Mapping.MAPPING_ALL_OBJECTS).setResultsName("object") + scheme) mapping_entry = pattern1 | pattern2 | pattern3 # To simplify parsing, classify groups of condition-mapping entry into two types: normal and default # A normal grouping is one with a non-default condition. The default grouping is one which contains the # default condition mapping_entries = Group(ZeroOrMore(mapping_entry)).setResultsName("mappings") normal_condition = Suppress(":") + originalTextFor(SDKConfig.get_expression_grammar()) default_condition = Optional(Suppress(":") + Literal(Mapping.DEFAULT_CONDITION)) normal_group = Group(normal_condition.setResultsName("condition") + mapping_entries) default_group = Group(default_condition + mapping_entries).setResultsName("default_group") normal_groups = Group(ZeroOrMore(normal_group)).setResultsName("normal_groups") # Any mapping fragment definition can have zero or more normal group and only one default group as a last entry. archive = Suppress("archive") + Suppress(":") + Fragment.ENTITY.setResultsName("archive") entries = Suppress("entries") + Suppress(":") + (normal_groups + default_group).setResultsName("entries") mapping = Group(header + archive + entries) mapping.setParseAction(lambda t: Mapping(t[0].archive, t[0].entries)) mapping.ignore("#" + restOfLine) return mapping
def parse_variadic_templates(txt): template_param_type = Word(alphas) template_variadic = Literal('...') template_id = Word(alphas) template_variadic_param = Group( template_param_type + template_variadic + template_id ) template_param = Group( template_param_type + template_id ) # template_params = Group ( delimitedList( template_variadic_param | Optional(template_param) ) ) template_params = ( Optional( OneOrMore(template_param + ',') ) + template_variadic_param + Optional( OneOrMore( ',' + template_param ) ) ) template_params_no_variadic = ( template_param + Optional( OneOrMore( ',' + template_param ) ) ) template_decl = Optional( "template" + Literal("<") + template_params_no_variadic + Literal(">") ) + "template" + Literal("<") + template_params + Literal(">") block_content = Forward() block = nestedExpr('{', '}', content=block_content) + Literal(';') * (0,1) block_content << ( CharsNotIn('{}') | block ) decl = originalTextFor( template_decl + CharsNotIn('{') + block ) template_file = Forward() code_block = decl | White() | Word(printables) template_file << ( Optional(OneOrMore(code_block)) | template_file) parsed = template_file.parseString( txt ) return parsed
def create_pre_timedelta_literal(tok): """ Detects <number> <timescale> <preposition> :param tok: the token we want to produce a detector for :type tok: str :return: the caseless literal :rtype: pyparsing.And """ delta = originalTextFor(Or([ Word(nums) + ZeroOrMore(',' + Word(nums+',')) + ZeroOrMore('.' + Word(nums)), CaselessLiteral('an'), CaselessLiteral('a') ])) + CaselessLiteral(tok) + DateParser.get_preposition_literals() delta.setName('pre' + tok).\ setParseAction(DateParser.generate_pre_timedelta) return delta
def parse_plus_string_to_list(plus_delimited_string): """ utility function to convert '+' delimited, quoted string into list of substrings. >>> s1 = "a" >>> parse_plus_string_to_list( s1) ['a'] >>> s1 = "a+b" >>> parse_plus_string_to_list( s1) ['a', 'b'] """ delimiter = '+' non_delimiter_chars = pyparsing.printables.replace(delimiter, '') OneOrMore = pyparsing.OneOrMore expression = pyparsing.originalTextFor( OneOrMore(pyparsing.quotedString | pyparsing.Word(non_delimiter_chars))) expressions = pyparsing.delimitedList(expression, delimiter) parse_result = expressions.parseString(plus_delimited_string) # extract the identified tokens from the pyparsing object, as a list results = list(parse_result) return results
def create_post_timedelta_literal(tok): """ Detects <plus/minus> <number> <timescale> :param tok: the token we want to produce a detector for :type tok: str :return: the caseless literal :rtype: pyparsing.Or """ delta = Or( [CaselessLiteral(t) for t in ['+', '-', 'plus', 'minus']] ) + originalTextFor(Or([ Word(nums) + ZeroOrMore(',' + Word(nums+',')) + ZeroOrMore('.' + Word(nums)), CaselessLiteral('an'), CaselessLiteral('a') ])) + CaselessLiteral(tok) + StringEnd() delta.setName('post' + tok).\ setParseAction(DateParser.generate_post_timedelta) return delta
def formatList(string: Optional[str], replaceSemicolons=True, replaceAnds=True) -> List[str]: """Parses a list. Garantees that each element of the list is non-null and non-empty. Gracefully supports quoting: does not split items that are quoted (single or double quotes). Args: - string (Optional[str]): String to convert into a list, using colons as separators - replaceSemicolons (bool, optional): Also use semicolons as separators. Defaults to True. - replaceAnds (bool, optional): Also uses "and" as separators. Defaults to True. Returns: - List[str]: List of strings """ if string is None: return [] value = string.strip() if replaceSemicolons: value = value.replace(";", ", ") if replaceAnds: value = value.replace(" et ", ", ").replace(" and ", ", ") quotedstring = pyparsing.quotedString.copy() quotedstring.addParseAction(pyparsing.removeQuotes) element = pyparsing.originalTextFor( pyparsing.ZeroOrMore( pyparsing.Word(pyparsing.printables + pyparsing.alphas8bit, excludeChars="(),") | pyparsing.nestedExpr())) expr = pyparsing.delimitedList(quotedstring | element) parsed = expr.parseString(value, parseAll=True) return [x for x in parsed.asList() if x]
def parse_variadic_templates(txt): template_param_type = Word(alphas) template_variadic = Literal('...') template_id = Word(alphas) template_variadic_param = Group(template_param_type + template_variadic + template_id) template_param = Group(template_param_type + template_id) # template_params = Group ( delimitedList( template_variadic_param | Optional(template_param) ) ) template_params = (Optional(OneOrMore(template_param + ',')) + template_variadic_param + Optional(OneOrMore(',' + template_param))) template_params_no_variadic = (template_param + Optional(OneOrMore(',' + template_param))) template_decl = Optional("template" + Literal("<") + template_params_no_variadic + Literal(">")) + "template" + Literal( "<") + template_params + Literal(">") block_content = Forward() block = nestedExpr('{', '}', content=block_content) + Literal(';') * (0, 1) block_content << (CharsNotIn('{}') | block) decl = originalTextFor(template_decl + CharsNotIn('{') + block) template_file = Forward() code_block = decl | White() | Word(printables) template_file << (Optional(OneOrMore(code_block)) | template_file) parsed = template_file.parseString(txt) return parsed
def parser_factory(styler): """Builds the repr() parser.""" squo = styler('class:string', "'") dquo = styler('class:string', '"') esc_single = pp.oneOf(r'\\ \' \" \n \r \t') esc_hex = pp.Literal(r'\x') + pp.Word(pp.hexnums, exact=2) escs = styler('class:escape', esc_single | esc_hex) control_chars = ''.join(map(chr, range(32))) + '\x7f' normal_chars_squo = pp.CharsNotIn(control_chars + r"\'") chars_squo = styler('class:string', normal_chars_squo) | escs normal_chars_dquo = pp.CharsNotIn(control_chars + r'\"') chars_dquo = styler('class:string', normal_chars_dquo) | escs skip_white = pp.Optional(pp.White()) bytes_prefix = pp.Optional(styler('class:string_prefix', 'b')) string_squo = skip_white + bytes_prefix + squo - pp.ZeroOrMore( chars_squo) + squo string_dquo = skip_white + bytes_prefix + dquo - pp.ZeroOrMore( chars_dquo) + dquo string = string_squo | string_dquo string.leaveWhitespace() address = styler('class:address', '0x' + pp.Word(pp.hexnums)) number = styler('class:number', ppc.number) const = pp.oneOf('True False None NotImplemented Ellipsis ...') const = styler('class:constant', const) kwarg = styler('class:kwarg', ppc.identifier) + styler( 'class:operator', '=') call = styler('class:call', ppc.identifier) + pp.FollowedBy('(') magic = styler('class:magic', pp.Regex(r'__[a-zA-Z0-9_]+__')) token = string | address | number | const | kwarg | call | magic token.parseWithTabs() return pp.originalTextFor(token)
def parse_element(cls, indent_stack): """Set ``only_if_master_value`` attribute to the rule.""" return (Keyword("@only_if_master_value").suppress() + originalTextFor(nestedExpr()) ).setResultsName("only_if_master_value").setParseAction( lambda toks: toks[0])
def __init__(self, codeBlock, codeIndex, msg): ParserException.__init__(self, codeBlock.xmlElement, msg) self.columnNumber = col(codeIndex, codeBlock.codeString) self.lineNumber = codeBlock.scriptLineNumber + lineno(codeIndex, codeBlock.codeString)-1 identifier = Word(alphas + '_', alphanums + '_') numericConstant = Regex(r'\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\.?[0-9]*)|(\.[0-9]+))((e|E)(\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f|ll|LL|ull|ULL)?\b') ignoreExpr = cppStyleComment.copy() | quotedString.copy() baseExpr = Forward() arrayAccess = originalTextFor(nestedExpr('[', ']', baseExpr, ignoreExpr)) parenthisedExpression = originalTextFor(nestedExpr('(', ')', baseExpr, ignoreExpr)) functionCall = nestedExpr('(', ')', delimitedList(baseExpr), ignoreExpr) alphaNumPlusSafePunctuation = alphanums + '!#$%&\\*+-./:;<=>@^_`{|}~' baseExpr << OneOrMore(originalTextFor(identifier + functionCall) | quotedString.copy() \ | identifier | numericConstant | arrayAccess | parenthisedExpression \ | Word(alphaNumPlusSafePunctuation)) baseExpr.ignore(cppStyleComment.copy()) def targetComponentsForOperatorsInString(operatorNames, codeBlock): """ Return a list of pairs of operator names and their targets that are in `codeString`. The valid operator names searched for are `operatorNames`. For example, if 'L' is in `operatorNames`, then in the code ``L[phi]`` the return value would be ``('L', 'phi', slice(firstCharacterIndex, lastCharacterIndex))``.
def _create_config_parser(): """ Creates a parser using pyparsing that works with bibfield rule definitions BNF like grammar: rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include include ::= "include(" PATH ")" body ::= [inherit_from] (creator | derived | calculated) [checker] [documentation] aliases ::= json_id ["[0]" | "[n]"] ["," aliases] creator ::= "creator:" INDENT creator_body+ UNDENT creator_body ::= [parse_first] [legacy] source_format "," source_tag "," python_allowed_expr source_format ::= MASTER_FORMATS source_tag ::= QUOTED_STRING derived ::= "derived" INDENT derived_calculated_body UNDENT calculated ::= "calculated:" INDENT derived_calculated_body UNDENT derived_calculated_body ::= [parse_first] [depends_on] [only_if] [do_not_cache] "," python_allowed_exp peristent_identfier ::= @persitent_identifier( level ) inherit_from ::= "@inherit_from()" legacy ::= "@legacy(" correspondences+ ")" do_not_cache ::= "@do_not_cache" correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")" parse_first ::= "@parse_first(" jsonid+ ")" depends_on ::= "@depends_on(" json_id+ ")" only_if ::= "@only_if(" python_condition+ ")" python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call checker ::= "checker:" INDENT checker_function+ UNDENT documentation ::= INDENT doc_string subfield* UNDENT doc_string ::= QUOTED_STRING subfield ::= "@subfield" json_id["."json_id*] ":" docstring """ indent_stack = [1] def check_sub_indent(str, location, tokens): cur_col = col(location, str) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(str, location, "not a subentry") def check_unindent(str, location, tokens): if location >= len(str): return cur_col = col(location, str) if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(str, location, "not an unindent") def do_unindent(): indent_stack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(check_unindent) UNDENT.setParseAction(do_unindent) json_id = (Word(alphanums + "_") + Optional(oneOf("[0] [n]")))\ .setResultsName("json_id", listAllMatches=True)\ .setParseAction(lambda tokens: "".join(tokens)) aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]"))) .setParseAction(lambda tokens: "".join(tokens)))\ .setResultsName("aliases") python_allowed_expr = Forward() ident = Word(alphas + "_", alphanums + "_") dict_def = originalTextFor(nestedExpr('{', '}')) list_def = originalTextFor(nestedExpr('[', ']')) dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']')) function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')')) python_allowed_expr << (ident ^ dict_def ^ list_def ^ dict_access ^ list_access ^ function_call)\ .setResultsName("value", listAllMatches=True) persistent_identifier = (Suppress("@persistent_identifier") + nestedExpr("(", ")"))\ .setResultsName("persistent_identifier") inherit_from = (Suppress("@inherit_from") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("inherit_from") legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("legacy", listAllMatches=True) only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if") depends_on = (Suppress("@depends_on") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("depends_on") parse_first = (Suppress("@parse_first") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("parse_first") do_not_cache = (Suppress("@") + "do_not_cache")\ .setResultsName("do_not_cache") master_format = (Suppress("@master_format") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("master_format") derived_calculated_body = Optional(parse_first) + Optional(depends_on) + Optional(only_if) + Optional(do_not_cache) + python_allowed_expr derived = "derived" + Suppress(":") + INDENT + derived_calculated_body + UNDENT calculated = "calculated" + Suppress(":") + INDENT + derived_calculated_body + UNDENT source_tag = quotedString\ .setParseAction(removeQuotes)\ .setResultsName("source_tag", listAllMatches=True) source_format = oneOf(CFG_BIBFIELD_MASTER_FORMATS)\ .setResultsName("source_format", listAllMatches=True) creator_body = (Optional(parse_first) + Optional(depends_on) + Optional(only_if) + Optional(legacy) + source_format + Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\ .setResultsName("creator_def", listAllMatches=True) creator = "creator" + Suppress(":") + INDENT + OneOrMore(creator_body) + UNDENT checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\ .setResultsName("checker_function", listAllMatches=True) checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT) doc_string = QuotedString(quoteChar='"""', multiline=True) | quotedString.setParseAction(removeQuotes) subfield = (Suppress("@subfield") + Word(alphanums + "_" + '.') + Suppress(":") + Optional(doc_string))\ .setResultsName("subfields", listAllMatches=True) documentation = ("documentation" + Suppress(":") + INDENT + Optional(doc_string).setResultsName("main_doc") + ZeroOrMore(subfield) + UNDENT)\ .setResultsName("documentation") field_def = (creator | derived | calculated)\ .setResultsName("type_field", listAllMatches=True) body = Optional(inherit_from) + Optional(field_def) + Optional(checker) + Optional(documentation) comment = Literal("#") + restOfLine + LineEnd() include = (Suppress("include") + quotedString)\ .setResultsName("includes", listAllMatches=True) rule = (Optional(persistent_identifier) + json_id + Optional(Suppress(",") + aliases) + Suppress(":") + INDENT + body + UNDENT)\ .setResultsName("rules", listAllMatches=True) return OneOrMore(rule | include | comment.suppress())
cated_expr = arith_expr + ZeroOrMore('//' + arith_expr) comp_op << oneOf('< > == >= <= /= .lt. .gt. .eq. .ge. .le. .ne.') comparison = arith_expr + ZeroOrMore(comp_op + arith_expr) not_test = Forward() not_test << (('.not.' + not_test) | comparison) and_test = not_test + ZeroOrMore('.and.' + not_test) or_test = and_test + ZeroOrMore('.or.' + and_test) logical_eq_test = or_test + ZeroOrMore(oneOf('.eqv. .neqv.') + or_test) user_dyadic_test = logical_eq_test + ZeroOrMore(user_op + logical_eq_test) test = user_dyadic_test calllist << ('(' + Optional(delimitedList(test, delim=',')) + ')') array_literal = '(/' + delimitedList(test, delim=',') + '/)' atom << (('(' + test + ')') | array_literal | NAME | INTEGER_K | REAL | STRING | oneOf('.true. .false.')) orig_test = originalTextFor(test) orig_test.addParseAction(lambda s,loc,toks: [toks[0].strip()]) funcall = Group(NAME + '(' + delimitedList(test, delim=',') + ')') comma = Literal(',').setParseAction(lambda s,loc,toks: [', ']) do_kwd = p.Keyword('do').setParseAction(lambda s,loc,toks: ['do ']) ivar = NAME.setResultsName('ivar') istart = orig_test.setResultsName('istart') eqsign = p.Literal('=').setParseAction(lambda s,loc,toks: [' = ']) comma = p.Literal(',').setParseAction(lambda s,loc,toks: [', ']) iend = orig_test.setResultsName('iend') istep = orig_test.setResultsName('istep')
def _create_field_parser(): """ Creates a parser using pyparsing that works with bibfield rule definitions BNF like grammar: rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include | python_comment include ::= "include(" PATH ")" body ::= [inherit_from] (creator | derived | calculated) [checker] [documentation] [producer] aliases ::= json_id ["[0]" | "[n]"] ["," aliases] creator ::= "creator:" INDENT creator_body+ UNDENT creator_body ::= [decorators] source_format "," source_tag "," python_allowed_expr source_format ::= MASTER_FORMATS source_tag ::= QUOTED_STRING derived ::= "derived" INDENT derived_calculated_body UNDENT calculated ::= "calculated:" INDENT derived_calculated_body UNDENT derived_calculated_body ::= [decorators] "," python_allowed_exp decorators ::= (peristent_identfier | legacy | do_not_cache | parse_first | depends_on | only_if | only_if_master_value)* peristent_identfier ::= @persitent_identifier( level ) legacy ::= "@legacy(" correspondences+ ")" correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")" parse_first ::= "@parse_first(" jsonid+ ")" depends_on ::= "@depends_on(" json_id+ ")" only_if ::= "@only_if(" python_condition+ ")" only_if_master_value ::= "@only_if_master_value(" python_condition+ ")" inherit_from ::= "@inherit_from()" python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call checker ::= "checker:" INDENT checker_function+ UNDENT documentation ::= INDENT doc_string subfield* UNDENT doc_string ::= QUOTED_STRING subfield ::= "@subfield" json_id["."json_id*] ":" docstring producer ::= "producer:" INDENT producer_body UNDENT producer_body ::= producer_code "," python_dictionary producer_code ::= ident """ indent_stack = [1] def check_sub_indent(str, location, tokens): cur_col = col(location, str) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(str, location, "not a subentry") def check_unindent(str, location, tokens): if location >= len(str): return cur_col = col(location, str) if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(str, location, "not an unindent") def do_unindent(): indent_stack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(check_unindent) UNDENT.setParseAction(do_unindent) json_id = (Word(alphas + "_", alphanums + "_") + Optional(oneOf("[0] [n]")))\ .setResultsName("json_id", listAllMatches=True)\ .setParseAction(lambda tokens: "".join(tokens)) aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]"))) .setParseAction(lambda tokens: "".join(tokens)))\ .setResultsName("aliases") ident = Word(alphas + "_", alphanums + "_") dict_def = originalTextFor(nestedExpr('{', '}')) list_def = originalTextFor(nestedExpr('[', ']')) dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']')) function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')')) python_allowed_expr = (dict_def ^ list_def ^ dict_access ^ \ list_access ^ function_call ^ restOfLine)\ .setResultsName("value", listAllMatches=True) persistent_identifier = (Suppress("@persistent_identifier") + \ nestedExpr("(", ")"))\ .setResultsName("persistent_identifier") legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("legacy", listAllMatches=True) only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if") only_if_master_value = (Suppress("@only_if_value") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if_master_value") depends_on = (Suppress("@depends_on") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("depends_on") parse_first = (Suppress("@parse_first") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("parse_first") memoize = (Suppress("@memoize") + nestedExpr("(", ")"))\ .setResultsName("memoize") field_decorator = parse_first ^ depends_on ^ only_if ^ \ only_if_master_value ^ memoize ^ legacy #Independent decorators inherit_from = (Suppress("@inherit_from") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("inherit_from") override = (Suppress("@") + "override")\ .setResultsName("override") extend = (Suppress("@") + "extend")\ .setResultsName("extend") master_format = (Suppress("@master_format") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("master_format") \ .setParseAction(lambda toks: toks[0]) derived_calculated_body = (ZeroOrMore(field_decorator) + python_allowed_expr)\ .setResultsName('derived_calculated_def') derived = "derived" + Suppress(":") + \ INDENT + derived_calculated_body + UNDENT calculated = "calculated" + Suppress(":") + \ INDENT + derived_calculated_body + UNDENT source_tag = quotedString\ .setParseAction(removeQuotes)\ .setResultsName("source_tag", listAllMatches=True) source_format = Word(alphas, alphanums + "_")\ .setResultsName("source_format", listAllMatches=True) creator_body = (ZeroOrMore(field_decorator) + source_format + \ Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\ .setResultsName("creator_def", listAllMatches=True) creator = "creator" + Suppress(":") + \ INDENT + OneOrMore(creator_body) + UNDENT field_def = (creator | derived | calculated)\ .setResultsName("type_field", listAllMatches=True) #JsonExtra json_dumps = (Suppress('dumps') + Suppress(',') + python_allowed_expr)\ .setResultsName("dumps")\ .setParseAction(lambda toks: toks.value[0]) json_loads = (Suppress("loads") + Suppress(",") + python_allowed_expr)\ .setResultsName("loads")\ .setParseAction(lambda toks: toks.value[0]) json_extra = (Suppress('json:') + \ INDENT + Each((json_dumps, json_loads)) + UNDENT)\ .setResultsName('json_ext') #Checker checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\ .setResultsName("checker", listAllMatches=True) checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT) #Description/Documentation doc_double = QuotedString(quoteChar='"""', multiline=True) doc_single = QuotedString(quoteChar="'''", multiline=True) doc_string = INDENT + (doc_double | doc_single) + UNDENT description_body = (Suppress('description:') + doc_string).\ setParseAction(lambda toks: toks[0][0]) description = (description_body | doc_double | doc_single)\ .setResultsName('description') #Producer producer_code = (Word(alphas, alphanums + "_")\ + originalTextFor(nestedExpr("(", ")")))\ .setResultsName('producer_code', listAllMatches=True) producer_body = (producer_code + Suppress(",") + python_allowed_expr)\ .setResultsName("producer_rule", listAllMatches=True) producer = Suppress("producer:") + INDENT + OneOrMore(producer_body) + UNDENT schema = (Suppress('schema:') + INDENT + dict_def + UNDENT)\ .setParseAction(lambda toks: toks[0])\ .setResultsName('schema') body = Optional(field_def) & Optional(checker) & Optional(json_extra) \ & Optional(description) & Optional(producer) & Optional(schema) comment = Literal("#") + restOfLine + LineEnd() include = (Suppress("include") + quotedString)\ .setResultsName("includes", listAllMatches=True) rule = (Optional(persistent_identifier) + Optional(inherit_from) + \ Optional(override) + Optional(extend) +json_id + \ Optional(Suppress(",") + aliases) + Suppress(":") + \ INDENT + body + UNDENT)\ .setResultsName("rules", listAllMatches=True) return OneOrMore(rule | include | comment.suppress())
date = Combine(integer + '/' + integer + '/' + integer) # Define the line definitions gender_line = gender("sex") + NL dob_line = date("DOB") + NL name_line = restOfLine("name") + NL id_line = Word(alphanums + '-')("ID") + NL recnum_line = integer("recnum") + NL # Define forms of address lines first_addr_line = Suppress('.') + empty + restOfLine + NL # Subsequent address line is not gender subsq_addr_line = ~(gender_line) + restOfLine + NL # a line with a name and a recnum combined, if no ID name_recnum_line = originalTextFor(OneOrMore(Word(alphas + ',')))("name") + \ integer("recnum") + NL # Defining the form of an overall record, either with or without an ID record = Group((first_addr_line + ZeroOrMore(subsq_addr_line))("address") + \ gender_line + dob_line + ((name_line + id_line + recnum_line) | \ name_recnum_line)) # Parse Data records = OneOrMore(record).parseString(data) # output the desired results (note that address is actually a list of lines) for rec in records: if rec.ID: fout.write("%(name)s, %(ID)s, %(address)s, %(sex)s, %(DOB)s\n" % rec)
'it', 'itself', 'its', 'one', 'oneself', 'they', 'them', 'themself', 'themselves', 'theirs', 'their'] ARTICLES = ['the', 'a', 'an'] NUMBERS = ["zero", "oh", "zip", "zilch", "nada", "bupkis", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety", "thousand", "million", "billion", "trillion", "quadrillion", "quintillion"] COPULA = ["be", "am", "is", "are", "being", "was", "were", "been"] CLOSED_WORD = set(PREPOSITIONS) | set(CONJUNTIONS) | set(PRONONS) | \ set(ARTICLES) | set(NUMBERS) | set(COPULA) bibtexChars = alphas + nums + "\\\\.-':," bracedWord = originalTextFor(nestedExpr("{", "}")) bracedWord.addParseAction(removeQuotes) WORDS = Word(bibtexChars) | bracedWord('braced') def caps(text): """ Capitalizes the first letter of the text and keeps the rest of the text intact. """ if text: return text[0].upper() + text[1:] return '' def abbrev(text, sep=' '): """ Abbreviates a text.
import six from pyparsing import ParseException, FollowedBy, Suppress, OneOrMore, Word, \ LineEnd, ZeroOrMore, Optional, Literal, alphas, alphanums, \ originalTextFor, nestedExpr, quotedString, removeQuotes, lineEnd, \ empty, col, restOfLine, delimitedList, Each, Keyword, commaSeparatedList, \ Group from .errors import FieldParserException, ModelParserException ParseException.defaultWhitespaceChars = (' \r\t') COMMENT = (Literal("#") + restOfLine + LineEnd()).suppress() IDENT = Word(alphanums + '_') DICT_DEF = originalTextFor(nestedExpr('{', '}')) LIST_DEF = originalTextFor(nestedExpr('[', ']')) DICT_ACCESS = LIST_ACCESS = originalTextFor(IDENT + nestedExpr('[', ']')) PYTHON_ALLOWED_EXPR = (DICT_DEF ^ LIST_DEF ^ DICT_ACCESS ^ LIST_ACCESS ^ restOfLine).setParseAction(lambda toks: toks[0]) def indentedBlock(expr, indent_stack, indent=True): """Define space-delimited indentation blocks. Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. There is also a version in pyparsing but doesn't seem to be working fine with JSONAlchemy cfg files.
from pyparsing_utils import LemmatizedWord import wordlists in_parens = Regex(r"\([^)]+\)") modifier = Or(LemmatizedWord(w) for w in wordlists.food_adjectives if w) | in_parens | Keyword("to taste") base_ingredient = Regex(r"[^-(),][^ (),]+") + SkipTo( Keyword("to taste") | Literal(",") | Word("-") | in_parens | LineEnd() ) unit = Optional(in_parens) + Or(LemmatizedWord(w) for w in wordlists.units_of_measure if w) quantity = OneOrMore(Word(nums + "-/")) ingredient_line = ( originalTextFor(Optional(quantity)).setResultsName("quantity") + originalTextFor(Optional(unit)).setResultsName("unit") + originalTextFor(ZeroOrMore(modifier + Optional(","))).setResultsName("pre_modifiers") + originalTextFor(base_ingredient).setResultsName("base_ingredient") + Optional(",") + Optional("-") + originalTextFor(SkipTo(LineEnd(), True)).setResultsName("post_modifiers") )
def __init__(self, fragment_file, sdkconfig): try: fragment_file = open(fragment_file, "r") except TypeError: pass path = os.path.realpath(fragment_file.name) indent_stack = [1] class parse_ctx: fragment = None # current fragment key = "" # current key keys = list() # list of keys parsed key_grammar = None # current key grammar @staticmethod def reset(): parse_ctx.fragment_instance = None parse_ctx.key = "" parse_ctx.keys = list() parse_ctx.key_grammar = None def fragment_type_parse_action(toks): parse_ctx.reset() parse_ctx.fragment = FRAGMENT_TYPES[ toks[0]]() # create instance of the fragment return None def expand_conditionals(toks, stmts): try: stmt = toks["value"] stmts.append(stmt) except KeyError: try: conditions = toks["conditional"] for condition in conditions: try: _toks = condition[1] _cond = condition[0] if sdkconfig.evaluate_expression(_cond): expand_conditionals(_toks, stmts) break except IndexError: expand_conditionals(condition[0], stmts) except KeyError: for tok in toks: expand_conditionals(tok, stmts) def key_body_parsed(pstr, loc, toks): stmts = list() expand_conditionals(toks, stmts) if parse_ctx.key_grammar.min and len( stmts) < parse_ctx.key_grammar.min: raise ParseFatalException( pstr, loc, "fragment requires at least %d values for key '%s'" % (parse_ctx.key_grammar.min, parse_ctx.key)) if parse_ctx.key_grammar.max and len( stmts) > parse_ctx.key_grammar.max: raise ParseFatalException( pstr, loc, "fragment requires at most %d values for key '%s'" % (parse_ctx.key_grammar.max, parse_ctx.key)) try: parse_ctx.fragment.set_key_value(parse_ctx.key, stmts) except Exception as e: raise ParseFatalException( pstr, loc, "unable to add key '%s'; %s" % (parse_ctx.key, e.message)) return None key = Word(alphanums + "_") + Suppress(":") key_stmt = Forward() condition_block = indentedBlock(key_stmt, indent_stack) key_stmts = OneOrMore(condition_block) key_body = Suppress(key) + key_stmts key_body.setParseAction(key_body_parsed) condition = originalTextFor( SDKConfig.get_expression_grammar()).setResultsName("condition") if_condition = Group( Suppress("if") + condition + Suppress(":") + condition_block) elif_condition = Group( Suppress("elif") + condition + Suppress(":") + condition_block) else_condition = Group( Suppress("else") + Suppress(":") + condition_block) conditional = (if_condition + Optional(OneOrMore(elif_condition)) + Optional(else_condition)).setResultsName("conditional") def key_parse_action(pstr, loc, toks): key = toks[0] if key in parse_ctx.keys: raise ParseFatalException( pstr, loc, "duplicate key '%s' value definition" % parse_ctx.key) parse_ctx.key = key parse_ctx.keys.append(key) try: parse_ctx.key_grammar = parse_ctx.fragment.get_key_grammars( )[key] key_grammar = parse_ctx.key_grammar.grammar except KeyError: raise ParseFatalException( pstr, loc, "key '%s' is not supported by fragment" % key) except Exception as e: raise ParseFatalException( pstr, loc, "unable to parse key '%s'; %s" % (key, e.message)) key_stmt << (conditional | Group(key_grammar).setResultsName("value")) return None def name_parse_action(pstr, loc, toks): parse_ctx.fragment.name = toks[0] key.setParseAction(key_parse_action) ftype = Word(alphas).setParseAction(fragment_type_parse_action) fid = Suppress(":") + Word(alphanums + "_.").setResultsName("name") fid.setParseAction(name_parse_action) header = Suppress("[") + ftype + fid + Suppress("]") def fragment_parse_action(pstr, loc, toks): key_grammars = parse_ctx.fragment.get_key_grammars() required_keys = set( [k for (k, v) in key_grammars.items() if v.required]) present_keys = required_keys.intersection(set(parse_ctx.keys)) if present_keys != required_keys: raise ParseFatalException( pstr, loc, "required keys %s for fragment not found" % list(required_keys - present_keys)) return parse_ctx.fragment fragment_stmt = Forward() fragment_block = indentedBlock(fragment_stmt, indent_stack) fragment_if_condition = Group( Suppress("if") + condition + Suppress(":") + fragment_block) fragment_elif_condition = Group( Suppress("elif") + condition + Suppress(":") + fragment_block) fragment_else_condition = Group( Suppress("else") + Suppress(":") + fragment_block) fragment_conditional = ( fragment_if_condition + Optional(OneOrMore(fragment_elif_condition)) + Optional(fragment_else_condition)).setResultsName("conditional") fragment = (header + OneOrMore(indentedBlock(key_body, indent_stack, False))).setResultsName("value") fragment.setParseAction(fragment_parse_action) fragment.ignore("#" + restOfLine) deprecated_mapping = DeprecatedMapping.get_fragment_grammar( sdkconfig, fragment_file.name).setResultsName("value") fragment_stmt << (Group(deprecated_mapping) | Group(fragment) | Group(fragment_conditional)) def fragment_stmt_parsed(pstr, loc, toks): stmts = list() expand_conditionals(toks, stmts) return stmts parser = ZeroOrMore(fragment_stmt) parser.setParseAction(fragment_stmt_parsed) self.fragments = parser.parseFile(fragment_file, parseAll=True) for fragment in self.fragments: fragment.path = path
from Orange.core import \ BasketFeeder, FileExampleGenerator, BasketExampleGenerator, \ C45ExampleGenerator, TabDelimExampleGenerator, \ registerFileType as register_file_type import Orange.feature as variable from Orange.feature import Descriptor MakeStatus = Orange.feature.Descriptor.MakeStatus make = Orange.feature.Descriptor.make from pyparsing import (printables, originalTextFor, OneOrMore, quotedString, Word, delimitedList) # unquoted words can contain anything but a colon printables_no_colon = printables.replace(',', '') content = originalTextFor(OneOrMore(quotedString | Word(printables_no_colon))) def loadARFF(filename, create_on_new=MakeStatus.Incompatible, **kwargs): """Return class:`Orange.data.Table` containing data from file in Weka ARFF format if there exists no .xml file with the same name. If it does, a multi-label dataset is read and returned. """ if filename[-5:] == ".arff": filename = filename[:-5] if os.path.exists(filename + ".xml") and os.path.exists(filename + ".arff"): xml_name = filename + ".xml" arff_name = filename + ".arff" return Orange.multilabel.mulan.trans_mulan_data(xml_name, arff_name, create_on_new) else: return loadARFF_Weka(filename, create_on_new)
from pyparsing import ParseException, FollowedBy, Suppress, OneOrMore, Word, \ LineEnd, ZeroOrMore, Optional, Literal, alphas, alphanums, \ originalTextFor, nestedExpr, quotedString, removeQuotes, lineEnd, \ empty, col, restOfLine, delimitedList, Each, Keyword, commaSeparatedList, \ Group from .errors import FieldParserException, ModelParserException from .registry import fields_definitions, models_definitions, parsers ParseException.defaultWhitespaceChars = (' \r\t') COMMENT = (Literal("#") + restOfLine + LineEnd()).suppress() IDENT = Word(alphanums + '_') DICT_DEF = originalTextFor(nestedExpr('{', '}')) LIST_DEF = originalTextFor(nestedExpr('[', ']')) DICT_ACCESS = LIST_ACCESS = originalTextFor(IDENT + nestedExpr('[', ']')) PYTHON_ALLOWED_EXPR = (DICT_DEF ^ LIST_DEF ^ DICT_ACCESS ^ LIST_ACCESS ^ restOfLine ).setParseAction(lambda toks: toks[0]) def indentedBlock(expr, indent_stack, indent=True): """Define space-delimited indentation blocks. Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. There is also a version in pyparsing but doesn't seem to be working fine
def get_fragment_grammar(sdkconfig, fragment_file): # Match header [mapping] header = Suppress("[") + Suppress("mapping") + Suppress("]") # There are three possible patterns for mapping entries: # obj:symbol (scheme) # obj (scheme) # * (scheme) obj = Fragment.ENTITY.setResultsName("object") symbol = Suppress(":") + Fragment.IDENTIFIER.setResultsName("symbol") scheme = Suppress("(") + Fragment.IDENTIFIER.setResultsName( "scheme") + Suppress(")") pattern1 = Group(obj + symbol + scheme) pattern2 = Group(obj + scheme) pattern3 = Group( Literal(Mapping.MAPPING_ALL_OBJECTS).setResultsName("object") + scheme) mapping_entry = pattern1 | pattern2 | pattern3 # To simplify parsing, classify groups of condition-mapping entry into two types: normal and default # A normal grouping is one with a non-default condition. The default grouping is one which contains the # default condition mapping_entries = Group( ZeroOrMore(mapping_entry)).setResultsName("mappings") normal_condition = Suppress(":") + originalTextFor( SDKConfig.get_expression_grammar()) default_condition = Optional( Suppress(":") + Literal(DeprecatedMapping.DEFAULT_CONDITION)) normal_group = Group( normal_condition.setResultsName("condition") + mapping_entries) default_group = Group(default_condition + mapping_entries).setResultsName("default_group") normal_groups = Group( ZeroOrMore(normal_group)).setResultsName("normal_groups") # Any mapping fragment definition can have zero or more normal group and only one default group as a last entry. archive = Suppress("archive") + Suppress( ":") + Fragment.ENTITY.setResultsName("archive") entries = Suppress("entries") + Suppress(":") + ( normal_groups + default_group).setResultsName("entries") mapping = Group(header + archive + entries) mapping.ignore("#" + restOfLine) def parsed_deprecated_mapping(pstr, loc, toks): fragment = Mapping() fragment.archive = toks[0].archive fragment.name = re.sub(r"[^0-9a-zA-Z]+", "_", fragment.archive) fragment.deprecated = True fragment.entries = set() condition_true = False for entries in toks[0].entries[0]: condition = next(iter(entries.condition.asList())).strip() condition_val = sdkconfig.evaluate_expression(condition) if condition_val: for entry in entries[1]: fragment.entries.add( (entry.object, None if entry.symbol == '' else entry.symbol, entry.scheme)) condition_true = True break if not fragment.entries and not condition_true: try: entries = toks[0].entries[1][1] except IndexError: entries = toks[0].entries[1][0] for entry in entries: fragment.entries.add( (entry.object, None if entry.symbol == '' else entry.symbol, entry.scheme)) if not fragment.entries: fragment.entries.add(("*", None, "default")) dep_warning = str( ParseFatalException( pstr, loc, "Warning: Deprecated old-style mapping fragment parsed in file %s." % fragment_file)) print(dep_warning) return fragment mapping.setParseAction(parsed_deprecated_mapping) return mapping
def parse_element(cls, indent_stack): return (Keyword("@only_if").suppress() + originalTextFor(nestedExpr()) ).setResultsName("only_if").setParseAction(lambda toks: toks[0])
emoji = Regex(':[\\S]+:').setResultsName('emoji') message = OneOrMore(Word(alphanums + "#")).setResultsName('message') def tail(name): return Suppress(White(max=1)) + CharsNotIn('').setResultsName(name) channel_name = Word(alphanums + '-').setResultsName('channel') user_name = Word(alphanums + '-_.') link = Word(printables) int_num = Word(nums) dumb_single_quotes = QuotedString("‘", endQuoteChar="’", escChar="\\") dumb_double_quotes = QuotedString("“", endQuoteChar="”", escChar="\\") quotedString.addParseAction(removeQuotes) comma_list = delimitedList((dumb_single_quotes | dumb_double_quotes | quotedString | originalTextFor(OneOrMore(Word(printables, excludeChars=","))))).setResultsName('comma_list') def flag(name): dashes = '--' if len(name) > 1 else '-' return CaselessLiteral(dashes + name).setResultsName(name) def flag_with_arg(name, argtype): dashes = '--' if len(name) > 1 else '-' return CaselessLiteral(dashes + name) + argtype.setResultsName(name)
def __init__(self): # Bibtex keywords string_def_start = pp.CaselessKeyword("@string") preamble_start = pp.CaselessKeyword("@preamble") comment_line_start = pp.CaselessKeyword('@comment') # String names string_name = pp.Word(pp.alphanums + '_')('StringName') self.set_string_name_parse_action(lambda s, l, t: None) string_name.addParseAction(self._string_name_parse_action) # Values inside bibtex fields # Values can be integer or string expressions. The latter may use # quoted or braced values. # Integer values integer = pp.Word(pp.nums)('Integer') # Braced values: braced values can contain nested (but balanced) braces braced_value_content = pp.CharsNotIn('{}') braced_value = pp.Forward() # Recursive definition for nested braces braced_value <<= pp.originalTextFor( '{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}' )('BracedValue') braced_value.setParseAction(remove_braces) # TODO add ignore for "\}" and "\{" ? # TODO @ are not parsed by bibtex in braces # Quoted values: may contain braced content with balanced braces brace_in_quoted = pp.nestedExpr('{', '}') text_in_quoted = pp.CharsNotIn('"{}') # (quotes should be escaped in quoted value) quoted_value = pp.originalTextFor( '"' + pp.ZeroOrMore(text_in_quoted | brace_in_quoted) + '"')('QuotedValue') quoted_value.addParseAction(pp.removeQuotes) # String expressions string_expr = pp.delimitedList( (quoted_value | braced_value | string_name), delim='#' )('StringExpression') self.set_string_expression_parse_action(lambda s, l, t: None) string_expr.addParseAction(self._string_expr_parse_action) value = (integer | string_expr)('Value') # Entries # @EntryType { ... entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType') entry_type.setParseAction(first_token) # Entry key: any character up to a ',' without leading and trailing # spaces. key = pp.SkipTo(',')('Key') # Exclude @',\#}{~% key.setParseAction(lambda s, l, t: first_token(s, l, t).strip()) # Field name: word of letters and underscores field_name = pp.Word(pp.alphas + '_')('FieldName') field_name.setParseAction(first_token) # Field: field_name = value field = pp.Group(field_name + pp.Suppress('=') + value)('Field') field.setParseAction(field_to_pair) # List of fields: comma separeted fields field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(',')) )('Fields') field_list.setParseAction( lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))}) # Entry: type, key, and fields self.entry = (entry_type + in_braces_or_pars(key + pp.Suppress(',') + field_list) )('Entry') # Other stuff: comments, string definitions, and preamble declarations # Explicit comments: @comment + everything up to next valid declaration # starting on new line. not_an_implicit_comment = (pp.LineStart() + pp.Literal('@') ) | pp.stringEnd() self.explicit_comment = ( pp.Suppress(comment_line_start) + pp.originalTextFor(pp.SkipTo(not_an_implicit_comment), asString=True))('ExplicitComment') self.explicit_comment.addParseAction(remove_trailing_newlines) self.explicit_comment.addParseAction(remove_braces) # Previous implementation included comment until next '}'. # This is however not inline with bibtex behavior that is to only # ignore until EOL. Brace stipping is arbitrary here but avoids # duplication on bibtex write. # Empty implicit_comments lead to infinite loop of zeroOrMore def mustNotBeEmpty(t): if not t[0]: raise pp.ParseException("Match must not be empty.") # Implicit comments: not anything else self.implicit_comment = pp.originalTextFor( pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty), asString=True)('ImplicitComment') self.implicit_comment.addParseAction(remove_trailing_newlines) # String definition self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars( string_name + pp.Suppress('=') + string_expr('StringValue') ))('StringDefinition') # Preamble declaration self.preamble_decl = (pp.Suppress(preamble_start) + in_braces_or_pars(value))('PreambleDeclaration') # Main bibtex expression self.main_expression = pp.ZeroOrMore( self.string_def | self.preamble_decl | self.explicit_comment | self.entry | self.implicit_comment)
@author: luca Submitted by Luca DallOlio, September, 2010 (Minor updates by Paul McGuire, June, 2012) ''' from pyparsing import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \ LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \ cStyleComment, Regex, Forward, MatchFirst, And, srange, oneOf, alphas, alphanums, \ delimitedList # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g # Tokens EOL = Suppress(LineEnd()) # $ singleTextString = originalTextFor(ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) ESC = Literal('\\') + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1)) LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1) CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'") STRING_LITERAL = Suppress("'") + Combine(OneOrMore(LITERAL_CHAR)) + Suppress("'") DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"' DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>' TOKEN_REF = Word(alphas.upper(), alphanums+'_') RULE_REF = Word(alphas.lower(), alphanums+'_') ACTION_ESC = (Suppress("\\") + Suppress("'")) | Suppress('\\"') | Suppress('\\') + (~(Literal("'") | Literal('"')) + Word(printables, exact=1)) ACTION_CHAR_LITERAL = Suppress("'") + (ACTION_ESC | ~(Literal('\\') | Literal("'")) + Word(printables, exact=1)) + Suppress("'") ACTION_STRING_LITERAL = Suppress('"') + ZeroOrMore(ACTION_ESC | ~(Literal('\\') | Literal('"')) + Word(printables, exact=1)) + Suppress('"') SRC = Suppress('src') + ACTION_STRING_LITERAL("file") + INT("line") id = TOKEN_REF | RULE_REF
URI = Regex(r'[^ ]+')("url") URL = (AT + URI) EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY VERSION_MANY = Combine(VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False)("_raw_spec") _VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)) _VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or '') VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker") MARKER_EXPR.setParseAction( lambda s, l, t: Marker(s[t._original_start:t._original_end]) ) MARKER_SEPERATOR = SEMICOLON MARKER = MARKER_SEPERATOR + MARKER_EXPR VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) URL_AND_MARKER = URL + Optional(MARKER) NAMED_REQUIREMENT = \ NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER)
def __parse_variable(to_parse): return ( pp.originalTextFor(parse.VARIABLE)('result') .leaveWhitespace() .parseWithTabs() .parseString(to_parse))['result']
notDigLower = notDigname.copy().setParseAction(lambda t: t[0].lower()) macroDef = notDigLower.copy() macroRef = notDigLower.copy().setParseAction(MacroReference.fromParseResult) fieldName = notDigLower.copy() entryType = notDigLower.setResultsName("entry type") citeKey = anyName.setResultsName("cite key") string = number | macroRef | quotedString | curlyString # There can be hash concatenation fieldValue = string + ZeroOrMore(HASH + string) namePart = Regex(r"(?!\band\b)[^\s\.,{}]+\.?") | curlyString nobility = Regex(r"[a-z]\w+\.?(\s[a-z]\w+\.?)*").setResultsName("nobility") # "van" etc. spacedNames = originalTextFor(OneOrMore(namePart)) firstNames = spacedNames.copy().setResultsName("firstname") lastNames = spacedNames.copy().setResultsName("lastname") nameSuffix = namePart.copy().setResultsName("suffix") # a name in "comma separated" style, like "Helmling, Michael" csName = Optional(nobility) + lastNames + COMMA + Optional(nameSuffix + COMMA) + firstNames def labelLiteralName(toks): """In case of a literal name, we cannot distinguish between first and middle names, or recognize multi-part last names. Hence it is assumed that the last part is the last name, anything else is stored as first names. """ toks["lastname"] = toks[-1] if len(toks) > 1: