def indentedBlock(expr, indent_stack, indent=True): """Define space-delimited indentation blocks. Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. There is also a version in pyparsing but doesn't seem to be working fine with JSONAlchemy cfg files. """ def check_sub_indent(string, location, tokens): """Check the indentation.""" cur_col = col(location, string) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(string, location, "not a subentry") def check_unindent(string, location, tokens): """Check the 'undentation'.""" if location >= len(string): return cur_col = col(location, string) if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(string, location, "not an unindent") def do_unindent(): """Unindent.""" indent_stack.pop() indent = lineEnd.suppress() + empty + empty.copy()\ .setParseAction(check_sub_indent) undent = FollowedBy(empty).setParseAction(check_unindent) undent.setParseAction(do_unindent) return indent + expr + undent
def indentedBlock(expr, indent_stack, indent=True): """Define space-delimited indentation blocks. Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. There is also a version in pyparsing but doesn't seem to be working fine with JSONAlchemy cfg files. """ def check_sub_indent(string, location, tokens): """Check the indentation.""" cur_col = col(location, string) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(string, location, "not a subentry") def check_unindent(string, location, tokens): """Check the 'undentation'.""" if location >= len(string): return cur_col = col(location, string) if not (cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(string, location, "not an unindent") def do_unindent(): """Unindent.""" indent_stack.pop() indent = lineEnd.suppress() + empty + empty.copy()\ .setParseAction(check_sub_indent) undent = FollowedBy(empty).setParseAction(check_unindent) undent.setParseAction(do_unindent) return indent + expr + undent
def __defineDictGrammar(self): """Function defines the grammar for parsing a string(mainly) into: 1. Value: Value could be any one of the following 1. Simple types such as: a. numbers: all are floating point b. boolean: [true,false], [yes, no] c. Strings within double quotes d. alphanumerics 2. Dictionary 3. List 2. Dictionary: Set of key value pairs. ':' delimits values from keys. ',' delimites different pairs. '{}' delimits a dictionary. 3. List: Ordered list of values delimited by ',' pyparsing parse actions are used to convert the tokens into pyton native datatype such 'float' for floating point, 'dict' for dictionary and 'list' for list. The parser supports arbitrary nesting of the above tokens. Both the nesting and datastructure type integrity is preserved in the resulting python representation. Application: One of the main use of the grammar is to scrap web pages and extract a combination of JSON and javascript-like HTML attributes into python data structures. Simpler use cases include extracting supported simple data types from say, HTML tables. """ dictDefn = Forward() listDefn = Forward() key = (QuotedString('"') | Word(alphas)) + FollowedBy(Literal(":")) key.setName("key") self.value = MatchFirst([ self.unknown, self.floatNumber, self.boolean, QuotedString('"'), Word(alphanums), dictDefn, listDefn ]) self.value.setName("value") # dict_element = Group(key + self.KDELIM + self.value) dict_element = Group(key + self.KDELIM + self.value) + \ FollowedBy(Or([Literal(","), Literal("}")])) lde = Group(Dict(delimitedList(dict_element))) dictDefn << ((self.quoteit(lde, '{', '}')) | lde) self.dictDefn = dictDefn self.dictDefn.setName("Dictionary") listDefn << self.quoteit(Group(delimitedList(self.value)), '[', ']') self.listDefn = listDefn self.listDefn.setName("List") self.topElement = Or([self.dictDefn, self.listDefn, self.value]) self.parseTypes[WebParser.PSTYPE_DEFAULT] = self.topElement self.parseTypes[WebParser.PSTYPE_DICT] = self.dictDefn return
def expr(self) -> ParserElement: return Combine( "{code" + Optional( ":" + Word(alphanums + "#+").setResultsName("lang") + FollowedBy(Literal("}") | Literal("|")), ) + ... + "}" + SkipTo("{code}").setResultsName("text") + "{code}", ).setParseAction(self.action)
def _define_vs(): KEY = Word(alphas + '_$', alphanums + '_$').setName('identifier').setResultsName('key') # noqa VALUE = originalTextFor(_define_json()).setResultsName('value') # validator name, eg: int NAME = Optional( Optional(Suppress('?')) + pyparsing_common.identifier.setResultsName('name')) # noqa # refers, eg: @xx@yy REFERS = Group(ZeroOrMore(Suppress('@') + pyparsing_common.identifier)).setResultsName( 'refers') # noqa # args, eg: (), (1), (1,2,3), ([1,2], {"key":"value"}, "Any JSON") ARGS = Group( Optional( Suppress('(') + Optional(delimitedList(VALUE)) + Suppress(')'))).setResultsName('args') # noqa # key-value, eg: key, key=True, key=[1,2,3] KW = Group(KEY + Optional(Suppress('=') + VALUE)) # kwargs, eg: &key1&key2=True&key3=[1,2,3] KWARGS = Group(ZeroOrMore(Suppress('&') + KW)).setResultsName('kwargs') # lead xxx is key: xxx@yyy, xxx?yyy, $self&abc # lead xxx except '$self' is validator name: xxx(1,2), xxx&abc, xxx SELF = Literal('$self').setResultsName('key') VS_KEY = Optional((KEY + FollowedBy(Word('@?'))) | SELF) VS_DEF = REFERS + NAME + ARGS + KWARGS return StringStart() + VS_KEY + VS_DEF + StringEnd()
def parse_connection_str(connstr): ## Grammar for connection syntax digits = "0123456789" othervalid = "_.@" identifier = Word(alphas + digits + othervalid) nodename = identifier.setResultsName('nodename') outputnames = delimitedList(identifier).setResultsName('outputnames') inputnames = delimitedList(identifier).setResultsName('inputnames') # middle nodes have both inputs and outputs middlenode = Group(nodename + Suppress('(') + inputnames + Optional("|" + outputnames) + Suppress(")")).setResultsName('middlenode') # first node has only outputs headnode = (nodename + Suppress("(") + outputnames + Suppress(")")).setResultsName('headnode') # last node has only inputs tailnode = (nodename + Suppress("(") + inputnames + Suppress(")")).setResultsName('tailnode') # connect head -> [middle ->] tail connect= Group( headnode + Group(ZeroOrMore(Suppress("->") \ + middlenode + FollowedBy("->") )).setResultsName('middlenodes') + Suppress("->")+tailnode).setResultsName('nodes') connectlist = Group( connect + ZeroOrMore( Suppress(";")\ + connect )).setResultsName('connects') parsed = connectlist.parseString(connstr) check_numconnections(parsed) return parsed
def __defineBasicTypes(self): self.KDELIM = Suppress(":") sign = Word("+-", max=1) + FollowedBy(Word(nums)) crncy = Word(nums) + ZeroOrMore(Suppress(",") + Word(nums)) + \ Optional(Literal(".") + Word(nums)) baseUnknownValue = Keyword("?") self.unknown = self.completeType(baseUnknownValue, "UNKNOWN_VAL", lambda t: np.nan) floatNumberBasic = Combine(Optional(sign) + \ Or([Word(nums), crncy, Regex(r'[0-9]+(\.\d*)?([eE]\d+)?')])) + \ Optional(Suppress("%")) self.floatNumber = self.completeType(floatNumberBasic, "float", lambda t: float(t[0])) baseBoolValue = Or([ CaselessKeyword("false"), CaselessKeyword("true"), CaselessKeyword("yes"), CaselessKeyword("no") ]) self.boolean = self.completeType(baseBoolValue, "bool", lambda t: WebParser.boolMaps[t[0]]) ratingKeywords = [CaselessKeyword(k).setParseAction( \ lambda t: Ratings.ratingMaps[t[0].lower()]) \ for k in Ratings.ratingMaps.keys()] ratingKeywords.append(Keyword("--").setParseAction(lambda t: np.nan)) self.ratings = self.completeType(Or(ratingKeywords), "ratings") self.parseTypes[WebParser.PSTYPE_RATINGS] = self.ratings
def _parser_piece_text(): """ Return PyParsing element to the text of a markdown link. """ # No double line breaks in markdown links double_line_break = (Word("\n\r", exact=1) + Optional(Word(" \t")) + Word("\n\r", exact=1)) # We will ignore escaped square brackets when match finding balanced # square brackets. ignore = Literal("\\[") | Literal("\\]") # The text parser will match text inside balanced brackets using the # nestedExpr helper function from PyParsing. # # Next we define the content that is allowed inside the brackets. content_character = ~FollowedBy(double_line_break) + CharsNotIn( "[]", exact=1) # Normally with nestedExpr, the content parser would be separately applied # to each whitespace-separated string within the nested expression. # However, since we set whitespaceChars to '', the content parser is # applied to characters one-at-a-time. # # If this ever changes, we would need to change content to something # like Combine(OneOrMore(~ignore + content_character)) content = content_character text = originalTextFor( nestedExpr( opener="[", closer="]", content=content, ignoreExpr=ignore, )).setResultsName("text") text.addParseAction(lambda s, l, toks: toks[0][1:-1]) return text
def __init__(self): if not ParserElement: return with warnings.catch_warnings(): # In Python 2.6, pyparsing throws warnings on its own code. warnings.simplefilter("ignore") orOperator = Suppress( CaselessLiteral("OR")).setResultsName("OR_OPERATOR") quoteContents = Group(Word(ALLCHARS.replace("\"", ""))) quoteContents.leaveWhitespace() quotedWord = Group(Suppress('"') + quoteContents + Suppress('"')).setResultsName("QUOTES") plainWord = Group( NotAny(CaselessLiteral("OR")) + Word(WORDCHARS.replace("-", ""), WORDCHARS)).setResultsName( "PLAINWORD") anyWord = Group( NotAny('(') + ~FollowedBy(')') + Word(ALLWORDCHARS)).setResultsName("ANYWORD") keyWord = Group( Combine( Optional("-") + Word(string.ascii_letters) + Literal(":") + (Word(WORDCHARS) | quotedWord))).setResultsName("KEYWORD") notExpr = Group( Suppress("-") + NotAny(string.whitespace) + (quotedWord | plainWord)).setResultsName("NOT") word = Group(keyWord | notExpr | quotedWord | plainWord).setResultsName("WORD") grammar = Forward() parens = Forward() orOperand = Group(word | parens | notExpr | anyWord).setResultsName("OR_OPERAND") orExpr = Group( FollowedBy(orOperand + orOperator + orOperand) + Group(orOperand + OneOrMore(orOperator + orOperand)) ).setResultsName("OR_EXPRESSION") oneExpr = Group(orExpr | parens | word | anyWord).setResultsName("ONE EXPRESSION") parens <<= Group( Group(Optional("-")).setResultsName("NOT_PARENTHESIS") + Suppress("(") + ZeroOrMore(parens | grammar) + Suppress(")")).setResultsName("PARENTHESIS") grammar <<= ((oneExpr + grammar) | oneExpr).setResultsName("GRAMMAR") self._grammar = grammar
def _construct_parser(self): '''Construct and return parser.''' field = Word(alphanums + '_.') operator = oneOf(list(self._operators.keys())) value = Word(alphanums + '-_,./*@+') quoted_value = quotedString('quoted_value').setParseAction(removeQuotes) condition = Group( field + operator + (quoted_value | value) )('condition') not_ = Optional(Suppress(CaselessKeyword('not')))('not') and_ = Suppress(CaselessKeyword('and'))('and') or_ = Suppress(CaselessKeyword('or'))('or') expression = Forward() parenthesis = Suppress('(') + expression + Suppress(')') previous = condition | parenthesis for conjunction in (not_, and_, or_): current = Forward() if conjunction in (and_, or_): conjunction_expression = ( FollowedBy(previous + conjunction + previous) + Group( previous + OneOrMore(conjunction + previous) )(conjunction.resultsName) ) elif conjunction in (not_, ): conjunction_expression = ( FollowedBy(conjunction.expr + current) + Group(conjunction + current)(conjunction.resultsName) ) else: # pragma: no cover raise ValueError('Unrecognised conjunction.') current <<= (conjunction_expression | previous) previous = current expression <<= previous return expression('expression')
def __build_grammar(): expr = Forward() k_select = CaselessLiteral("SELECT") k_from = CaselessLiteral("FROM") k_where = CaselessLiteral("WHERE") k_and = CaselessLiteral("AND") k_instances = CaselessLiteral("INSTANCES") qs = QuotedString("'", escQuote="''") identifier = Combine( Word(alphas + "_", exact=1) + Optional(Word(nums + alphas + "_")))("identifier") navigation = Group(identifier + ZeroOrMore(Suppress(".") + identifier))("navigation") filter_predicate = Group(navigation + Suppress("=") + (qs('value') | (Suppress('(') + expr('subquery') + Suppress(')'))))('predicate') where_clause = Group( Suppress(k_where) + filter_predicate + ZeroOrMore(Suppress(k_and) + filter_predicate))('where') # Pre filters impl = Optional(Suppress(CaselessLiteral("implementation"))) + qs('impl') cic = Suppress(CaselessLiteral("offer")) + qs('cic') lc = Suppress(CaselessLiteral("lc")) + qs('lc') envt = Suppress(CaselessLiteral("environment")) + qs('envt') pre_filter = Optional(envt) + Optional(lc) + Optional(cic) + Optional( impl) + FollowedBy(k_instances) # Dict query (only select some elements and navigate) nl_expr = Group(navigation + ZeroOrMore(Suppress(',') + navigation) + FollowedBy(k_from))('selector') # The sum of all fears select = Group( Suppress(k_select) + Optional(nl_expr + Suppress(k_from)) + pre_filter + Suppress(k_instances) + Optional(where_clause) + Optional(CaselessLiteral('WITH COMPUTATIONS')('compute')))('select') expr << select return expr
def expr(self) -> ParserElement: MENTION = Combine( "[" + Optional( SkipTo("|", failOn="]") + Suppress("|"), default="", ) + "~" + Optional(CaselessLiteral("accountid:")) + Word(alphanums + ":-").setResultsName("accountid") + "]", ) return ((StringStart() | Optional(PrecededBy(White(), retreat=1), default=" ")) + MENTION.setParseAction(self.action) + (StringEnd() | Optional(FollowedBy( White() | Char(punctuation, excludeChars="[") | MENTION), default=" ")))
def expr(self) -> ParserElement: NON_ALPHANUMS = Regex(r"\W", flags=re.UNICODE) TOKEN = Suppress(self.TOKEN) IGNORE = White() + TOKEN | self.get_ignore_expr() ELEMENT = Combine( TOKEN + (~White() & ~Char(self.TOKEN)) + SkipTo(TOKEN, ignore=IGNORE, failOn="\n") + TOKEN + FollowedBy(NON_ALPHANUMS | StringEnd()), ) return (StringStart() | PrecededBy(NON_ALPHANUMS, retreat=1)) + Combine( ELEMENT.setParseAction(self.action) + Optional(~ELEMENT, default=" "), )
def _define_grammar(self): g = {} label = Literal('Contents') | Literal('Caption title') | \ Literal('Sub-caption') | Literal('Half-title') | \ Literal('Footline') | Literal('Comments') | \ Literal('Modificatons') | Literal('Errors') | \ Literal('DMF') | Literal('ADF') copies_label = LineStart() + Literal('Copies') all_chars = u''.join( unichr(c) for c in xrange(65536) if unicodedata.category(unichr(c)).startswith('L')) section_separator = LineEnd() + FollowedBy(label | copies_label | StringEnd()) section = SkipTo(section_separator) library = Combine(Word(all_chars) + Literal(u'-') + Word(all_chars)) copy_separator = LineEnd() + FollowedBy(library) | \ LineEnd() + StringEnd() | StringEnd() copy = library + SkipTo(copy_separator) + Suppress(copy_separator) g['comments'] = Suppress('Comments') + SkipTo(section_separator) g['code'] = StringStart() + SkipTo(LineEnd()) + Suppress(LineEnd()) g['title'] = Suppress(g['code']) + Suppress(LineEnd()) + section g['copies'] = Suppress(copies_label) + OneOrMore(Group(copy)) return g
def parse_pabl(self, raw_pabl): INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction( self.check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(self.check_unindent) UNDENT.setParseAction(self.unindent) terminator = Literal(';').suppress() comment = Literal('#') + restOfLine item_name = Word(alphas, alphanums + '_') variable = Word(alphas, alphanums + '_.') variable_as = (variable + 'as' + item_name) stmt = Forward() suite = Group( OneOrMore(empty + stmt.setParseAction(self.check_peer_indent))) suite.ignore(comment) item_start = Literal('@item').suppress() item_end = Literal(':').suppress() permission_start = Literal('@permissions') item_decl = (item_start + item_name.setResultsName('item') + item_end) item_defn = Group(item_decl + INDENT + suite + UNDENT) permission_decl = (permission_start + Group( delimitedList(item_name).setResultsName('permissions')) + item_end) permission_defn = Group(permission_decl + INDENT + suite + UNDENT) fieldList = delimitedList( Group(variable_as) | variable ).setResultsName('fields') + terminator stmt << (item_defn | fieldList | Group(permission_defn)) parseTree = suite.parseString(raw_pabl) return parseTree
def pythonVar(self): if not self._pythonVar: from pyparsing import (ParserElement, Word, alphas, alphanums, Literal, Suppress, FollowedBy) _ws = ' \t' ParserElement.setDefaultWhitespaceChars(_ws) ident = Word(alphas + "_", alphanums + "_") lparen = Literal("(") dot = Literal(".") dollar = Literal("$") self._pythonVar = Suppress(dollar) + ident + ~FollowedBy( (dot + ident) | lparen) self._pythonVar.setParseAction(self.onPythonVar) return self._pythonVar
def create_parser(self): LBRACKET = Suppress("[") RBRACKET = Suppress("]") EQ = Suppress("=") SLASH = Suppress("/") KEY = Word(alphanums + "-") VALUE = Word(alphanums + "-/.:_+") | QuotedString('"') FIND = LBRACKET + Group( Literal("find") + Literal("default-name") + EQ + VALUE) + RBRACKET KVP = Group(KEY + EQ + VALUE) BEGIN = LineStart() + SLASH + restOfLine.setParseAction(self.on_begin) ADD_OP = LineStart() + Literal("add") + ZeroOrMore(KVP).setParseAction( self.on_add) SET_OP = (LineStart() + Literal("set") + (Optional(FIND | KEY + ~FollowedBy(EQ) | QuotedString('"')) + ZeroOrMore(KVP)).setParseAction(self.on_set)) CONFIG = ZeroOrMore(BEGIN | ADD_OP | SET_OP) return CONFIG
def _make_arabic_parser(): escapechar = "//" # wordchars = printables # for specialchar in '*?^():"{}[] ' + escapechar: # wordchars = wordchars.replace(specialchar, "") # wordtext = Word(wordchars) alephba = u""" abcdefghijklmnopqrstuvwxyz_ األآإـتنمكطدجحخهعغفقثصضشسيبئءؤرىةوزظذ """ wordtext = CharsNotIn(u'//*؟^():"{}[]$><%~#،,\' +-|') escape = Suppress( escapechar ) \ + ( Word( printables, exact = 1 ) | White( exact = 1 ) ) wordtoken = Combine(OneOrMore(wordtext | escape)) # A plain old word. plainWord = Group(wordtoken).setResultsName("Word") # A wildcard word containing * or ?. wildchars = Word(u"؟?*") # Start with word chars and then have wild chars mixed in wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken)) # Or, start with wildchars, and then either a mixture of word and wild chars # , or the next token wildstart = wildchars \ + ( OneOrMore( wordtoken + Optional( wildchars ) ) \ | FollowedBy( White() \ | StringEnd() ) ) wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard") # A range of terms startfence = Literal("[") endfence = Literal("]") rangeitem = QuotedString('"') | wordtoken to = Keyword( u"الى" ) \ | Keyword( u"إلى" ) \ | Keyword( "To" ) \ | Keyword( "to" ) \ | Keyword( "TO" ) openstartrange = Group( Empty() ) \ + Suppress( to + White() ) \ + Group( rangeitem ) openendrange = Group( rangeitem ) \ + Suppress( White() + to ) \ + Group( Empty() ) normalrange = Group( rangeitem ) \ + Suppress( White() + to + White() ) \ + Group( rangeitem ) range = Group( startfence \ + ( normalrange | openstartrange | openendrange ) \ + endfence ).setResultsName( "Range" ) # synonyms syn_symbol = Literal("~") synonym = Group(syn_symbol + wordtoken).setResultsName("Synonyms") # antonyms ant_symbol = Literal("#") antonym = Group(ant_symbol + wordtoken).setResultsName("Antonyms") # derivation level 1,2 derive_symbole = Literal(u"<") | Literal(u">") derivation = Group(OneOrMore(derive_symbole) + wordtoken).setResultsName("Derivation") # spellerrors # spellerrors=Group(QuotedString('\'')).setResultsName("Errors") spellerrors_symbole = Literal(u"%") spellerrors = Group(spellerrors_symbole + wordtoken).setResultsName("SpellErrors") # shakl:must uplevel to boostable tashkil_symbol = Literal("'") tashkil = Group( tashkil_symbol + \ ZeroOrMore( wordtoken | White() ) + \ tashkil_symbol ).setResultsName( "Tashkil" ) # tuple search (root,pattern,type) starttuple = Literal("{") endtuple = Literal("}") bettuple = Literal(u"،") | Literal(",") wordtuple = Group(Optional(wordtoken)) tuple = Group( starttuple + \ wordtuple + \ ZeroOrMore( bettuple + wordtuple ) + \ endtuple ).setResultsName( "Tuple" ) # A word-like thing generalWord = range | wildcard | plainWord | tuple | antonym | synonym | \ derivation | tashkil | spellerrors # A quoted phrase quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group( (Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = generalWord | quotedPhrase boostedUnit = Group( boostableUnit + \ Suppress( "^" ) + \ Word( "0123456789", ".0123456789" ) ).setResultsName( "Boost" ) # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group( ( Word( alephba + "_" ) | Word( alphanums + "_" ) ) + \ Suppress( ':' ) + \ fieldableUnit ).setResultsName( "Field" ) # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group( Suppress( Keyword( u"ليس" ) | Keyword( u"NOT" ) ) + \ Suppress( White() ) + \ unit ).setResultsName( "Not" ) generalUnit = operatorNot | unit andToken = Keyword(u"و") | Keyword(u"AND") orToken = Keyword(u"أو") | Keyword(u"او") | Keyword(u"OR") andNotToken = Keyword(u"وليس") | Keyword(u"ANDNOT") operatorAnd = Group( ( generalUnit + \ Suppress( White() ) + \ Suppress( andToken ) + \ Suppress( White() ) + \ expression ) | \ ( generalUnit + \ Suppress( Literal( u"+" ) ) + \ expression ) ).setResultsName( "And" ) operatorOr = Group( ( generalUnit + \ Suppress( White() ) + \ Suppress( orToken ) + \ Suppress( White() ) + \ expression ) | \ ( generalUnit + \ Suppress( Literal( u"|" ) ) + \ expression ) ).setResultsName( "Or" ) operatorAndNot = Group( ( unit + \ Suppress( White() ) + \ Suppress( andNotToken ) + \ Suppress( White() ) + \ expression ) | \ ( unit + \ Suppress( Literal( u"-" ) ) + \ expression ) ).setResultsName( "AndNot" ) expression <<= ( OneOrMore( operatorAnd | operatorOr | operatorAndNot | \ generalUnit | Suppress( White() ) ) | Empty() ) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
Word, WordEnd, WordStart, ZeroOrMore, ) from data_lists import tlds, schemes alphanum_word_start = WordStart(wordChars=alphanums) alphanum_word_end = WordEnd(wordChars=alphanums) # the label definition ignores the fact that labels should not end in an hyphen label = Word(initChars=alphanums, bodyChars=alphanums + '-', max=63) domain_tld = Or(tlds) domain_name = (alphanum_word_start + Combine( Combine(OneOrMore(label + ('.' + FollowedBy(Word(alphanums + '-'))))) ('domain_labels') + domain_tld('tld')) + alphanum_word_end).setParseAction(downcaseTokens) ipv4_section = (Word( nums, asKeyword=True, max=3).setParseAction(lambda x: str(int(x[0]))).addCondition( lambda tokens: int(tokens[0]) < 256)) # basically, the grammar below says: start any words that start with a '.' or a number; I want to match words that start with a '.' because this will fail later in the grammar and I do not want to match anything that start with a '.' ipv4_address = (alphanum_word_start + WordStart('.' + nums) + Combine((ipv4_section + '.') * 3 + ipv4_section) + NotAny(Regex('\.\S')) + alphanum_word_end) hexadectet = Word(hexnums, min=1, max=4) ipv6_address_full = alphanum_word_start + Combine((hexadectet + ":") * 7 + hexadectet)
yes = "Yes, You can!" print(grammar.parseString(yes)) print("length : ", len(grammar.parseString(yes))) # http://pythonhosted.org/pyparsing/pyparsing.OneOrMore-class.html # Class OneOrMore # Repetition of one or more of the given expression. # Parameters: # expr - expression that must match one or more times # stopOn - (default=None) - expression for a terminating sentinel (only required if the sentinel would ordinarily match the repetition expression) data_word = Word(alphas) label = data_word + FollowedBy(':') attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) text = "shape: SQUARE posn: upper left color: BLACK" print(text) OneOrMore(attr_expr).parseString(text).pprint( ) # Fail! read 'posn' as data instead of next label -> [['shape', 'SQUARE posn']] # use stopOn attribute for OneOrMore to avoid reading label string as part of the data attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) OneOrMore(attr_expr).parseString(text).pprint( ) # [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] # could also be written as (attr_expr * (1, )).parseString(text).pprint(
tokens = list(result) node = tokens[0] for token in tokens[1:]: if isinstance(token, Identifier): node = Get(node, token) elif isinstance(token, FunCall): assert isinstance(token.function, Identifier) node = FunCall(Get(node, token.function), token.arguments) return node orphan_function_call_paren = Forward().setName("orphan_function_call_paren") member_access_token = dot + (orphan_function_call_paren | identifier_keyword) leading_member_access_token = member_access_token + FollowedBy( member_access_token) member_access = ( (orphan_function_call_paren + FollowedBy(member_access_token) | identifier) + pOptional((leading_member_access_token)[...] + dot + identifier_keyword) ).setParseAction(__build_recursive_member_access) # Operator def __build_unary_operator(expr, pos, result): tokens = result[0].asList() assert len(tokens) == 2 operator_symbol = tokens[0] operand = tokens[1] tree = UnOp(operator_symbol, operand)
Optional(OneOrMore(STATEMENT), default=None)(PROP_BODY)) ELSE = Group( Suppress(SYN_ELSE) + Optional(OneOrMore(STATEMENT), default=None)(PROP_BODY)) CONDITIONAL = Group( Group( Group( Group( IF(TYPE_IF) + ZeroOrMore(ELSEIF)(TYPE_ELSEIF) + Optional(ELSE(TYPE_ELSE)) # StringEnd() is because EOF can end a conditional. # SYN_CLOSE_BRACE is because a closed block can end a conditional. + (Literal(SYN_ENDIF)(TYPE_ENDIF) | StringEnd() | FollowedBy(SYN_CLOSE_BRACE))))(PROP_BODY))( TYPE_CONDITIONAL)) ANON_BLOCK = Group( Literal(SYN_OPEN_BRACE)(TYPE_ANON_BLOCK) + Optional(OneOrMore(ROOT), default=None)(PROP_BODY) # StringEnd() is because EOF can end a block. + (Suppress(SYN_CLOSE_BRACE) | StringEnd())) NAMED_BLOCK = Group( SYN_BLOCKS(TYPE_BLOCK) + ~SYN_KEYWORDS + Optional(Word(TOKEN)(PROP_VALUE)) + Literal(SYN_OPEN_BRACE) + Optional(OneOrMore(STATEMENT), default=None)(PROP_BODY) + (Suppress(SYN_CLOSE_BRACE) | StringEnd())) EMPTY_BLOCK = Group(
from regparser.grammar import atomic from regparser.grammar.utils import keep_pos, Marker, QuickSearchable period_section = Suppress(".") + atomic.section part_section = atomic.part + period_section marker_part_section = ( keep_pos(atomic.section_marker).setResultsName("marker") + part_section) depth6_p = atomic.em_roman_p | atomic.plaintext_level6_p depth5_p = ((atomic.em_digit_p | atomic.plaintext_level5_p) + Optional(depth6_p)) depth4_p = atomic.upper_p + Optional(depth5_p) depth3_p = atomic.roman_p + Optional(depth4_p) depth2_p = atomic.digit_p + Optional(depth3_p) depth1_p = atomic.lower_p + ~FollowedBy(atomic.upper_p) + Optional(depth2_p) any_depth_p = QuickSearchable(depth1_p | depth2_p | depth3_p | depth4_p | depth5_p | depth6_p) depth3_c = atomic.upper_c + Optional(atomic.em_digit_c) depth2_c = atomic.roman_c + Optional(depth3_c) depth1_c = atomic.digit_c + Optional(depth2_c) any_a = atomic.upper_a | atomic.digit_a section_comment = atomic.section + depth1_c section_paragraph = QuickSearchable(atomic.section + depth1_p) mps_paragraph = QuickSearchable(marker_part_section + Optional(depth1_p)) ps_paragraph = part_section + Optional(depth1_p) part_section_paragraph = QuickSearchable(atomic.part + Suppress(".") +
def __init__(self): self.filename = "" self.basedir = "" ############# # Constants # ############# self.possibleKeywords = [ "AGGREGALLOWED", "AUTOPEN", "AXIS-VERSION", "BASEPERIOD", "CELLNOTE", "CELLNOTEX", "CFPRICES", "CHARSET", "CODEPAGE", "CODES", "CONFIDENTIAL", "CONTACT", "CONTENTS", "CONTVARIABLE", "COPYRIGHT", "CREATION-DATE", "DATA", "DATABASE", "DATANOTECELL", "DATANOTESUM", "DATASYMBOL1", "DATASYMBOL2", "DATASYMBOL3", "DATASYMBOL4", "DATASYMBOL5", "DATASYMBOL6", "DATASYMBOLNIL", "DATASYMBOLSUM", "DAYADJ", "DECIMAL", "DEFAULT-GRAPH", "DESCRIPTION", "DESCRIPTIONDEFAULT", "DIRECTORY-PATH", "DOMAIN", "DOUBLECOLUMN", "ELIMINATION", "HEADING", "HIERARCHIES", "HIERARCHYLEVELS", "HIERARCHYLEVELSOPEN", "HIERARCHYNAMES", "INFO", "INFOFILE", "KEYS", "LANGUAGE", "LANGUAGES", "LAST-UPDATED", "LINK", "MAP", "MATRIX", "NEXT-UPDATE", "NOTE", "NOTEX", "PARTITIONED", "PRECISION", "PRESTEXT", "PX-SERVER", "REFPERIOD", "ROUNDING", "SEASADJ", "SHOWDECIMALS", "SOURCE", "STOCKFA", "STUB", "SUBJECT-AREA", "SUBJECT-CODE", "SURVEY", "SYNONYMS", "TABLEID", "TIMEVAL", "TITLE", "UNITS", "UPDATE-FREQUENCY", "VALUENOTE", "VALUENOTEX", "VALUES", "VARIABLE-TYPE" ] self.mandatoryKeywords = [ "CONTENTS", "DATA", "DECIMAL", "HEADING", "MATRIX", "STUB", "SUBJECT-AREA", "SUBJECT-CODE", "TITLE", "UNITS", "VALUES" ] self.languageAllowedKeywords = [ "BASEPERIOD", "CELLNOTE", "CELLNOTEX", "CFPRICES", "CODES", "CONTACT", "CONTENTS", "CONTVARIABLE", "DATABASE", "DATANOTECELL", "DATANOTESUM", "DATASYMBOL1", "DATASYMBOL2", "DATASYMBOL3", "DATASYMBOL4", "DATASYMBOL5", "DATASYMBOL6", "DATASYMBOLNIL", "DATASYMBOLSUM", "DAYADJ", "DESCRIPTION", "DOMAIN", "DOUBLECOLUMN", "ELIMINATION", "HEADING", "HIERARCHIES", "HIERARCHYLEVELS", "HIERARCHYLEVELSOPEN", "HIERARCHYNAMES", "INFO", "INFOFILE", "KEYS", "LAST-UPDATED", "LINK", "MAP", "NOTE", "NOTEX", "PARTITIONED", "PRECISION", "PRESTEXT", "REFPERIOD", "SEASADJ", "SOURCE", "STOCKFA", "STUB", "SUBJECT-AREA", "SURVEY", "TIMEVAL", "TITLE", "UNITS", "VALUENOTE", "VALUENOTEX", "VALUES", "VARIABLE-TYPE" ] ########### # Buffers # ########### self.buffers = { "foundKeywords": [], "currentKeyword": "", "validLanguageCodes": [], "languageCode": "", "timeFormat": "", "rounding": "", "results": {}, } ########### # Grammar # ########### # Utilities self.grammar = {} self.grammar["EOL"] = LineEnd().suppress() self.grammar["quote"] = Suppress(Regex("\"|\'")) self.grammar["number"] = Word(nums).setParseAction( self.convertToNumber) self.grammar["quotedString"] = (QuotedString('"') | QuotedString("'"))\ .setParseAction(lambda tokens: tokens[0].replace("#", "\n")) self.grammar["quotedNumber"] = self.grammar["quote"] + self.grammar[ "number"] + self.grammar["quote"] self.grammar["lparen"], self.grammar["rparen"], self.grammar[ "lbracket"], self.grammar["rbracket"] = map(Suppress, "()[]") self.grammar["dots"] = Regex("\.{1,6}") self.grammar["quotedDots"] = self.grammar["quote"] + self.grammar[ "dots"] + self.grammar["quote"] self.grammar["dataNumber"] = Combine(Optional("-") + Word(nums) +\ Optional(Literal(".") + Word(nums))).setParseAction(self.convertToNumber) # Keywords self.grammar["baseKeyword"] = Word(alphanums.upper() + "-")\ .setParseAction(self.handleKeyword)("keyword") self.grammar["tableSpecificKeyword"] = self.grammar[ "baseKeyword"] + FollowedBy("=") self.grammar["variableSpecificKeyword"] = self.grammar["baseKeyword"] +\ self.grammar["lparen"] +\ self.grammar["quotedString"]("variable") +\ self.grammar["rparen"] + FollowedBy("=") self.grammar["valueSpecificKeyword"] = self.grammar["baseKeyword"] +\ self.grammar["lparen"] + Group(self.grammar["quotedString"]("variable") + Suppress(",") +\ self.grammar["quotedString"]("value")) + self.grammar["rparen"] + FollowedBy("=") self.grammar["languageSpecificKeyword"] = (self.grammar["baseKeyword"] +\ self.grammar["lbracket"] + Word(alphas).setParseAction(self.isValidLanguageCode)("language") +\ self.grammar["rbracket"] + FollowedBy("=")).setParseAction(self.isLanguageAllowedKeyword) self.grammar["keyword"] = self.grammar["tableSpecificKeyword"] |\ self.grammar["variableSpecificKeyword"] |\ self.grammar["valueSpecificKeyword"] |\ self.grammar["languageSpecificKeyword"] # Keyword values self.grammar["keywordValue"] = OneOrMore(self.grammar["quotedString"])\ .setParseAction(lambda tokens: " ".join(tokens)) self.grammar["keywordValues"] = Group(delimitedList((self.grammar["number"] |\ self.grammar["keywordValue"])\ .setParseAction(self.handleKeywordValue)("keywordValue")))("keywordValues") +\ FollowedBy(";") # Time list values self.grammar["timeFormat"] = Regex("[AHQMW]1").setParseAction( self.setTimeFormat)("timeFormat") self.grammar["timeValues"] = Group( delimitedList(self.grammar["quotedNumber"])("timeValue")) self.grammar["timeSpan"] = Group( delimitedList(self.grammar["quotedNumber"]("timeValue"), delim="-")) self.grammar["TLIST"] = Literal("TLIST") + self.grammar["lparen"] +\ (self.grammar["timeFormat"] + ((self.grammar["rparen"] + Suppress(",") +\ self.grammar["timeValues"])|(Suppress(",") + self.grammar["timeSpan"] +\ self.grammar["rparen"]))).setParseAction(self.convertTimeList) + FollowedBy(";") self.grammar["keywordLine"] = Group(self.grammar["keyword"] + Suppress("=") +\ (self.grammar["keywordValues"] | self.grammar["TLIST"]) +\ Suppress(";")) # Data values self.grammar["observation"] = (self.grammar["quotedDots"] | self.grammar["dataNumber"]) self.grammar["observationSeparator"] = White(" \t").suppress() self.grammar["observationLine"] = Group(delimitedList(self.grammar["observation"],\ delim=self.grammar["observationSeparator"]).leaveWhitespace()) self.grammar["observationLines"] = OneOrMore(self.grammar["observationLine"] +\ Optional(Suppress(";")))("keywordValues") self.grammar["data"] = Group(Literal("DATA")("keyword") + Suppress("=") +\ Optional(OneOrMore(self.grammar["EOL"])) + self.grammar["observationLines"]) # Whole file self.grammar["pcaxisFile"] = OneOrMore(self.grammar["keywordLine"]) +\ OneOrMore(self.grammar["EOL"]) + self.grammar["data"]
from rdflib.py3compat import bytestype ParserElement.setDefaultWhitespaceChars(" \n") String = STRING_LITERAL1 | STRING_LITERAL2 RDFLITERAL = Comp('literal', Param('string', String) + Optional( Param('lang', LANGTAG.leaveWhitespace() ) | Literal('^^').leaveWhitespace( ) + Param('datatype', IRIREF).leaveWhitespace())) NONE_VALUE = object() EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t") EMPTY.setParseAction(lambda x: NONE_VALUE) TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM)) ROW.parseWithTabs() HEADER = Var + ZeroOrMore(Suppress("\t") + Var) HEADER.parseWithTabs() class TSVResultParser(ResultParser): def parse(self, source): if isinstance(source.read(0), bytestype):
if curCol > indentStack[-1]: indentStack.append( curCol ) else: raise ParseException(s,l,"not a subentry") def checkUnindent(s,l,t): if l >= len(s): return curCol = col(l,s) if not(curCol < indentStack[-1] and curCol <= indentStack[-2]): raise ParseException(s,l,"not an unindent") def doUnindent(): indentStack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(checkSubIndent) UNDENT = FollowedBy(empty).setParseAction(checkUnindent) UNDENT.setParseAction(doUnindent) stmt = Forward() suite = Group( OneOrMore( empty + stmt.setParseAction( checkPeerIndent ) ) ) identifier = Word(alphas, alphanums) funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") funcDef = Group( funcDecl + INDENT + suite + UNDENT ) rvalue = Forward() funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") rvalue << (funcCall | identifier | Word(nums)) assignment = Group(identifier + "=" + rvalue) stmt << ( funcDef | assignment | identifier )
Keyword("archive") + qualified_identifier("name") ) resource_type = Group( raw_data("raw_data") | vector("vector") | multivector("multivector") | archive_resource("archive") | single_object("object") ) def _combine_list(t): return "".join(t[0].asList()) explicit_field_reference_prefix = Group( OneOrMore((Optional(".") + identifier + ~FollowedBy(','))) ).setParseAction(_combine_list) explicit_reference = Group( Keyword("@explicit_reference") - "(" + explicit_field_reference_prefix("source_type") + "." + identifier("source_field") + "," + qualified_identifier("destination") + ")" ) bound_implicitly = Group( Keyword("@bound_implicitly") - "(" + identifier("name") + ":" +
def get_grammar(self): """ Defines our grammar for mathematical expressions. Possibly helpful: - BNF form of context-free grammar https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form - Some pyparsing docs http://infohost.nmt.edu/~shipman/soft/pyparsing/web/index.html """ # Define + and - plus = Literal("+") # Also accept unicode emdash emdash = Literal("\u2014") emdash.setParseAction(lambda: "-") minus = Literal("-") | emdash plus_minus = plus | minus # 1 or 1.0 or .1 number_part = Word(nums) inner_number = Combine((number_part + Optional("." + Optional(number_part))) | ("." + number_part)) # Combine() joints the matching parts together in a single token, # and requires that the matching parts be contiguous (no spaces) # Define our suffixes suffix = Word(alphas + '%') suffix.setParseAction(self.suffix_parse_action) # Construct number as a group consisting of a text string ("num") and an optional suffix. # num can include a decimal number and numerical exponent, and can be # converted to a number using float() # suffix may contain alphas or % # Spaces are ignored inside numbers # Group wraps everything up into its own ParseResults object when parsing number = Group( Combine( inner_number + Optional( CaselessLiteral("E") + Optional(plus_minus) + number_part), )("num") + Optional(suffix)("suffix"))("number") # Note that calling ("name") on the end of a parser is equivalent to calling # parser.setResultsName, which is used to pull that result out of a parsed # expression like a dictionary. # Construct variable and function names front = Word(alphas, alphanums) # must start with alpha subscripts = Word(alphanums + '_') + ~FollowedBy('{') # ~ = not lower_indices = Literal("_{") + Optional("-") + Word( alphanums) + Literal("}") upper_indices = Literal("^{") + Optional("-") + Word( alphanums) + Literal("}") # Construct an object name in either of two forms: # 1. front + subscripts + tail # 2. front + lower_indices + upper_indices + tail # where: # front (required): # starts with alpha, followed by alphanumeric # subscripts (optional): # any combination of alphanumeric and underscores # lower_indices (optional): # Of form "_{(-)<alphanumeric>}" # upper_indices (optional): # Of form "^{(-)<alphanumeric>}" # tail (optional): # any number of primes name = Combine(front + Optional(subscripts | (Optional(lower_indices) + Optional(upper_indices))) + ZeroOrMore("'")) # Define a variable as a pyparsing result that contains one object name variable = Group(name("varname"))("variable") variable.setParseAction(self.variable_parse_action) # initialize recursive grammar expression = Forward() # Construct functions as consisting of funcname and arguments as # funcname(arguments) # where arguments is a comma-separated list of arguments, returned as a list # Must have at least 1 argument function = Group( name("funcname") + Suppress("(") + Group(delimitedList(expression))("arguments") + Suppress(")"))("function") function.setParseAction(self.function_parse_action) # Define parentheses parentheses = Group(Suppress("(") + expression + Suppress(")"))('parentheses') # Define arrays array = Group( Suppress("[") + delimitedList(expression) + Suppress("]"))("array") # atomic units evaluate directly to number or array without binary operations atom = number | function | variable | parentheses | array # Define operations in order of precedence # Define exponentiation, possibly including negative powers power = atom + ZeroOrMore(Suppress("^") + Optional(minus)("op") + atom) power.addParseAction(self.group_if_multiple('power')) # Define negation (e.g., in 5*-3 --> we need to evaluate the -3 first) # Negation in powers is handled separately # This has been arbitrarily assigned a higher precedence than parallel negation = Optional(minus)("op") + power negation.addParseAction(self.group_if_multiple('negation')) # Define the parallel operator 1 || 5 == 1/(1/1 + 1/5) pipes = Literal('|') + Literal('|') parallel = negation + ZeroOrMore(Suppress(pipes) + negation) parallel.addParseAction(self.group_if_multiple('parallel')) # Define multiplication and division product = parallel + ZeroOrMore((Literal('*') | Literal('/'))("op") + parallel) product.addParseAction(self.group_if_multiple('product')) # Define sums and differences # Note that leading - signs are treated by negation sumdiff = Optional(plus) + product + ZeroOrMore( plus_minus("op") + product) sumdiff.addParseAction(self.group_if_multiple('sum')) # Close the recursion expression << sumdiff return expression + stringEnd
def _create_config_parser(): """ Creates a parser using pyparsing that works with bibfield rule definitions BNF like grammar: rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include include ::= "include(" PATH ")" body ::= [inherit_from] (creator | derived | calculated) [checker] [documentation] aliases ::= json_id ["[0]" | "[n]"] ["," aliases] creator ::= "creator:" INDENT creator_body+ UNDENT creator_body ::= [parse_first] [legacy] source_format "," source_tag "," python_allowed_expr source_format ::= MASTER_FORMATS source_tag ::= QUOTED_STRING derived ::= "derived" INDENT derived_calculated_body UNDENT calculated ::= "calculated:" INDENT derived_calculated_body UNDENT derived_calculated_body ::= [parse_first] [depends_on] [only_if] [do_not_cache] "," python_allowed_exp peristent_identfier ::= @persitent_identifier( level ) inherit_from ::= "@inherit_from()" legacy ::= "@legacy(" correspondences+ ")" do_not_cache ::= "@do_not_cache" correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")" parse_first ::= "@parse_first(" jsonid+ ")" depends_on ::= "@depends_on(" json_id+ ")" only_if ::= "@only_if(" python_condition+ ")" python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call checker ::= "checker:" INDENT checker_function+ UNDENT documentation ::= INDENT doc_string subfield* UNDENT doc_string ::= QUOTED_STRING subfield ::= "@subfield" json_id["."json_id*] ":" docstring """ indent_stack = [1] def check_sub_indent(str, location, tokens): cur_col = col(location, str) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(str, location, "not a subentry") def check_unindent(str, location, tokens): if location >= len(str): return cur_col = col(location, str) if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(str, location, "not an unindent") def do_unindent(): indent_stack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(check_unindent) UNDENT.setParseAction(do_unindent) json_id = (Word(alphanums + "_") + Optional(oneOf("[0] [n]")))\ .setResultsName("json_id", listAllMatches=True)\ .setParseAction(lambda tokens: "".join(tokens)) aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]"))) .setParseAction(lambda tokens: "".join(tokens)))\ .setResultsName("aliases") python_allowed_expr = Forward() ident = Word(alphas + "_", alphanums + "_") dict_def = originalTextFor(nestedExpr('{', '}')) list_def = originalTextFor(nestedExpr('[', ']')) dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']')) function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')')) python_allowed_expr << (ident ^ dict_def ^ list_def ^ dict_access ^ list_access ^ function_call)\ .setResultsName("value", listAllMatches=True) persistent_identifier = (Suppress("@persistent_identifier") + nestedExpr("(", ")"))\ .setResultsName("persistent_identifier") inherit_from = (Suppress("@inherit_from") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("inherit_from") legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("legacy", listAllMatches=True) only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if") depends_on = (Suppress("@depends_on") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("depends_on") parse_first = (Suppress("@parse_first") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("parse_first") do_not_cache = (Suppress("@") + "do_not_cache")\ .setResultsName("do_not_cache") master_format = (Suppress("@master_format") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("master_format") derived_calculated_body = Optional(parse_first) + Optional(depends_on) + Optional(only_if) + Optional(do_not_cache) + python_allowed_expr derived = "derived" + Suppress(":") + INDENT + derived_calculated_body + UNDENT calculated = "calculated" + Suppress(":") + INDENT + derived_calculated_body + UNDENT source_tag = quotedString\ .setParseAction(removeQuotes)\ .setResultsName("source_tag", listAllMatches=True) source_format = oneOf(CFG_BIBFIELD_MASTER_FORMATS)\ .setResultsName("source_format", listAllMatches=True) creator_body = (Optional(parse_first) + Optional(depends_on) + Optional(only_if) + Optional(legacy) + source_format + Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\ .setResultsName("creator_def", listAllMatches=True) creator = "creator" + Suppress(":") + INDENT + OneOrMore(creator_body) + UNDENT checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\ .setResultsName("checker_function", listAllMatches=True) checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT) doc_string = QuotedString(quoteChar='"""', multiline=True) | quotedString.setParseAction(removeQuotes) subfield = (Suppress("@subfield") + Word(alphanums + "_" + '.') + Suppress(":") + Optional(doc_string))\ .setResultsName("subfields", listAllMatches=True) documentation = ("documentation" + Suppress(":") + INDENT + Optional(doc_string).setResultsName("main_doc") + ZeroOrMore(subfield) + UNDENT)\ .setResultsName("documentation") field_def = (creator | derived | calculated)\ .setResultsName("type_field", listAllMatches=True) body = Optional(inherit_from) + Optional(field_def) + Optional(checker) + Optional(documentation) comment = Literal("#") + restOfLine + LineEnd() include = (Suppress("include") + quotedString)\ .setResultsName("includes", listAllMatches=True) rule = (Optional(persistent_identifier) + json_id + Optional(Suppress(",") + aliases) + Suppress(":") + INDENT + body + UNDENT)\ .setResultsName("rules", listAllMatches=True) return OneOrMore(rule | include | comment.suppress())
def Verilog_BNF(): global verilogbnf if verilogbnf is None: # compiler directives compilerDirective = Combine( "`" + \ oneOf("define undef ifdef else endif default_nettype " "include resetall timescale unconnected_drive " "nounconnected_drive celldefine endcelldefine") + \ restOfLine ).setName("compilerDirective") # primitives SEMI,COLON,LPAR,RPAR,LBRACE,RBRACE,LBRACK,RBRACK,DOT,COMMA,EQ = map(Literal,";:(){}[].,=") identLead = alphas+"$_" identBody = alphanums+"$_" identifier1 = Regex( r"\.?["+identLead+"]["+identBody+r"]*(\.["+identLead+"]["+identBody+"]*)*" ).setName("baseIdent") identifier2 = Regex(r"\\\S+").setParseAction(lambda t:t[0][1:]).setName("escapedIdent")#.setDebug() identifier = identifier1 | identifier2 assert(identifier2 == r'\abc') hexnums = nums + "abcdefABCDEF" + "_?" base = Regex("'[bBoOdDhH]").setName("base") basedNumber = Combine( Optional( Word(nums + "_") ) + base + Word(hexnums+"xXzZ"), joinString=" ", adjacent=False ).setName("basedNumber") #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) + #~ Optional( DOT + Optional( Word( spacedNums ) ) ) + #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") ) number = ( basedNumber | \ Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \ ).setName("numeric") #~ decnums = nums + "_" #~ octnums = "01234567" + "_" expr = Forward().setName("expr") concat = Group( LBRACE + delimitedList( expr ) + RBRACE ) multiConcat = Group("{" + expr + concat + "}").setName("multiConcat") funcCall = Group(identifier + LPAR + Optional( delimitedList( expr ) ) + RPAR).setName("funcCall") subscrRef = Group(LBRACK + delimitedList( expr, COLON ) + RBRACK) subscrIdentifier = Group( identifier + Optional( subscrRef ) ) #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") )) scalarConst = Regex("0|1('[Bb][01xX])?") mintypmaxExpr = Group( expr + COLON + expr + COLON + expr ).setName("mintypmax") primary = ( number | (LPAR + mintypmaxExpr + RPAR ) | ( LPAR + Group(expr) + RPAR ).setName("nestedExpr") | multiConcat | concat | dblQuotedString | funcCall | subscrIdentifier ) unop = oneOf( "+ - ! ~ & ~& | ^| ^ ~^" ).setName("unop") binop = oneOf( "+ - * / % == != === !== && " "|| < <= > >= & | ^ ^~ >> << ** <<< >>>" ).setName("binop") expr << ( ( unop + expr ) | # must be first! ( primary + "?" + expr + COLON + expr ) | ( primary + Optional( binop + expr ) ) ) lvalue = subscrIdentifier | concat # keywords if_ = Keyword("if") else_ = Keyword("else") edge = Keyword("edge") posedge = Keyword("posedge") negedge = Keyword("negedge") specify = Keyword("specify") endspecify = Keyword("endspecify") fork = Keyword("fork") join = Keyword("join") begin = Keyword("begin") end = Keyword("end") default = Keyword("default") forever = Keyword("forever") repeat = Keyword("repeat") while_ = Keyword("while") for_ = Keyword("for") case = oneOf( "case casez casex" ) endcase = Keyword("endcase") wait = Keyword("wait") disable = Keyword("disable") deassign = Keyword("deassign") force = Keyword("force") release = Keyword("release") assign = Keyword("assign") eventExpr = Forward() eventTerm = ( posedge + expr ) | ( negedge + expr ) | expr | ( LPAR + eventExpr + RPAR ) eventExpr << ( Group( delimitedList( eventTerm, Keyword("or") ) ) ) eventControl = Group( "@" + ( ( LPAR + eventExpr + RPAR ) | identifier | "*" ) ).setName("eventCtrl") delayArg = ( number | Word(alphanums+"$_") | #identifier | ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) ).setName("delayArg")#.setDebug() delay = Group( "#" + delayArg ).setName("delay")#.setDebug() delayOrEventControl = delay | eventControl assgnmt = Group( lvalue + EQ + Optional( delayOrEventControl ) + expr ).setName( "assgnmt" ) nbAssgnmt = Group(( lvalue + "<=" + Optional( delay ) + expr ) | ( lvalue + "<=" + Optional( eventControl ) + expr )).setName( "nbassgnmt" ) range = LBRACK + expr + COLON + expr + RBRACK paramAssgnmt = Group( identifier + EQ + expr ).setName("paramAssgnmt") parameterDecl = Group( "parameter" + Optional( range ) + delimitedList( paramAssgnmt ) + SEMI).setName("paramDecl") inputDecl = Group( "input" + Optional( range ) + delimitedList( identifier ) + SEMI ) outputDecl = Group( "output" + Optional( range ) + delimitedList( identifier ) + SEMI ) inoutDecl = Group( "inout" + Optional( range ) + delimitedList( identifier ) + SEMI ) regIdentifier = Group( identifier + Optional( LBRACK + expr + COLON + expr + RBRACK ) ) regDecl = Group( "reg" + Optional("signed") + Optional( range ) + delimitedList( regIdentifier ) + SEMI ).setName("regDecl") timeDecl = Group( "time" + delimitedList( regIdentifier ) + SEMI ) integerDecl = Group( "integer" + delimitedList( regIdentifier ) + SEMI ) strength0 = oneOf("supply0 strong0 pull0 weak0 highz0") strength1 = oneOf("supply1 strong1 pull1 weak1 highz1") driveStrength = Group( LPAR + ( ( strength0 + COMMA + strength1 ) | ( strength1 + COMMA + strength0 ) ) + RPAR ).setName("driveStrength") nettype = oneOf("wire tri tri1 supply0 wand triand tri0 supply1 wor trior trireg") expandRange = Optional( oneOf("scalared vectored") ) + range realDecl = Group( "real" + delimitedList( identifier ) + SEMI ) eventDecl = Group( "event" + delimitedList( identifier ) + SEMI ) blockDecl = ( parameterDecl | regDecl | integerDecl | realDecl | timeDecl | eventDecl ) stmt = Forward().setName("stmt")#.setDebug() stmtOrNull = stmt | SEMI caseItem = ( delimitedList( expr ) + COLON + stmtOrNull ) | \ ( default + Optional(":") + stmtOrNull ) stmt << Group( ( begin + Group( ZeroOrMore( stmt ) ) + end ).setName("begin-end") | ( if_ + Group(LPAR + expr + RPAR) + stmtOrNull + Optional( else_ + stmtOrNull ) ).setName("if") | ( delayOrEventControl + stmtOrNull ) | ( case + LPAR + expr + RPAR + OneOrMore( caseItem ) + endcase ) | ( forever + stmt ) | ( repeat + LPAR + expr + RPAR + stmt ) | ( while_ + LPAR + expr + RPAR + stmt ) | ( for_ + LPAR + assgnmt + SEMI + Group( expr ) + SEMI + assgnmt + RPAR + stmt ) | ( fork + ZeroOrMore( stmt ) + join ) | ( fork + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ) | ( wait + LPAR + expr + RPAR + stmtOrNull ) | ( "->" + identifier + SEMI ) | ( disable + identifier + SEMI ) | ( assign + assgnmt + SEMI ) | ( deassign + lvalue + SEMI ) | ( force + assgnmt + SEMI ) | ( release + lvalue + SEMI ) | ( begin + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ).setName("begin:label-end") | # these *have* to go at the end of the list!!! ( assgnmt + SEMI ) | ( nbAssgnmt + SEMI ) | ( Combine( Optional("$") + identifier ) + Optional( LPAR + delimitedList(expr|empty) + RPAR ) + SEMI ) ).setName("stmtBody") """ x::=<blocking_assignment> ; x||= <non_blocking_assignment> ; x||= if ( <expression> ) <statement_or_null> x||= if ( <expression> ) <statement_or_null> else <statement_or_null> x||= case ( <expression> ) <case_item>+ endcase x||= casez ( <expression> ) <case_item>+ endcase x||= casex ( <expression> ) <case_item>+ endcase x||= forever <statement> x||= repeat ( <expression> ) <statement> x||= while ( <expression> ) <statement> x||= for ( <assignment> ; <expression> ; <assignment> ) <statement> x||= <delay_or_event_control> <statement_or_null> x||= wait ( <expression> ) <statement_or_null> x||= -> <name_of_event> ; x||= <seq_block> x||= <par_block> x||= <task_enable> x||= <system_task_enable> x||= disable <name_of_task> ; x||= disable <name_of_block> ; x||= assign <assignment> ; x||= deassign <lvalue> ; x||= force <assignment> ; x||= release <lvalue> ; """ alwaysStmt = Group( "always" + Optional(eventControl) + stmt ).setName("alwaysStmt") initialStmt = Group( "initial" + stmt ).setName("initialStmt") chargeStrength = Group( LPAR + oneOf( "small medium large" ) + RPAR ).setName("chargeStrength") continuousAssign = Group( assign + Optional( driveStrength ) + Optional( delay ) + delimitedList( assgnmt ) + SEMI ).setName("continuousAssign") tfDecl = ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | timeDecl | integerDecl | realDecl ) functionDecl = Group( "function" + Optional( range | "integer" | "real" ) + identifier + SEMI + Group( OneOrMore( tfDecl ) ) + Group( ZeroOrMore( stmt ) ) + "endfunction" ) inputOutput = oneOf("input output") netDecl1Arg = ( nettype + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl2Arg = ( "trireg" + Optional( chargeStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl3Arg = ( nettype + Optional( driveStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( assgnmt ) ) ) netDecl1 = Group(netDecl1Arg + SEMI).setName("netDecl1") netDecl2 = Group(netDecl2Arg + SEMI).setName("netDecl2") netDecl3 = Group(netDecl3Arg + SEMI).setName("netDecl3") gateType = oneOf("and nand or nor xor xnor buf bufif0 bufif1 " "not notif0 notif1 pulldown pullup nmos rnmos " "pmos rpmos cmos rcmos tran rtran tranif0 " "rtranif0 tranif1 rtranif1" ) gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \ LPAR + Group( delimitedList( expr ) ) + RPAR gateDecl = Group( gateType + Optional( driveStrength ) + Optional( delay ) + delimitedList( gateInstance) + SEMI ) udpInstance = Group( Group( identifier + Optional(range | subscrRef) ) + LPAR + Group( delimitedList( expr ) ) + RPAR ) udpInstantiation = Group( identifier - Optional( driveStrength ) + Optional( delay ) + delimitedList( udpInstance ) + SEMI ).setName("udpInstantiation") parameterValueAssignment = Group( Literal("#") + LPAR + Group( delimitedList( expr ) ) + RPAR ) namedPortConnection = Group( DOT + identifier + LPAR + expr + RPAR ).setName("namedPortConnection")#.setDebug() assert(r'.\abc (abc )' == namedPortConnection) modulePortConnection = expr | empty #~ moduleInstance = Group( Group ( identifier + Optional(range) ) + #~ ( delimitedList( modulePortConnection ) | #~ delimitedList( namedPortConnection ) ) ) inst_args = Group( LPAR + (delimitedList( namedPortConnection ) | delimitedList( modulePortConnection )) + RPAR).setName("inst_args") moduleInstance = Group( Group ( identifier + Optional(range) ) + inst_args ).setName("moduleInstance")#.setDebug() moduleInstantiation = Group( identifier + Optional( parameterValueAssignment ) + delimitedList( moduleInstance ).setName("moduleInstanceList") + SEMI ).setName("moduleInstantiation") parameterOverride = Group( "defparam" + delimitedList( paramAssgnmt ) + SEMI ) task = Group( "task" + identifier + SEMI + ZeroOrMore( tfDecl ) + stmtOrNull + "endtask" ) specparamDecl = Group( "specparam" + delimitedList( paramAssgnmt ) + SEMI ) pathDescr1 = Group( LPAR + subscrIdentifier + "=>" + subscrIdentifier + RPAR ) pathDescr2 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + RPAR ) pathDescr3 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "=>" + Group( delimitedList( subscrIdentifier ) ) + RPAR ) pathDelayValue = Group( ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) | mintypmaxExpr | expr ) pathDecl = Group( ( pathDescr1 | pathDescr2 | pathDescr3 ) + EQ + pathDelayValue + SEMI ).setName("pathDecl") portConditionExpr = Forward() portConditionTerm = Optional(unop) + subscrIdentifier portConditionExpr << portConditionTerm + Optional( binop + portConditionExpr ) polarityOp = oneOf("+ -") levelSensitivePathDecl1 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + subscrIdentifier + Optional( polarityOp ) + "=>" + subscrIdentifier + EQ + pathDelayValue + SEMI ) levelSensitivePathDecl2 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + LPAR + Group( delimitedList( subscrIdentifier ) ) + Optional( polarityOp ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + RPAR + EQ + pathDelayValue + SEMI ) levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2 edgeIdentifier = posedge | negedge edgeSensitivePathDecl1 = Group( Optional( if_ + Group(LPAR + expr + RPAR) ) + LPAR + Optional( edgeIdentifier ) + subscrIdentifier + "=>" + LPAR + subscrIdentifier + Optional( polarityOp ) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI ) edgeSensitivePathDecl2 = Group( Optional( if_ + Group(LPAR + expr + RPAR) ) + LPAR + Optional( edgeIdentifier ) + subscrIdentifier + "*>" + LPAR + delimitedList( subscrIdentifier ) + Optional( polarityOp ) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI ) edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2 edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr") timCheckEventControl = Group( posedge | negedge | (edge + LBRACK + delimitedList( edgeDescr ) + RBRACK )) timCheckCond = Forward() timCondBinop = oneOf("== === != !==") timCheckCondTerm = ( expr + timCondBinop + scalarConst ) | ( Optional("~") + expr ) timCheckCond << ( ( LPAR + timCheckCond + RPAR ) | timCheckCondTerm ) timCheckEvent = Group( Optional( timCheckEventControl ) + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) timCheckLimit = expr controlledTimingCheckEvent = Group( timCheckEventControl + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) notifyRegister = identifier systemTimingCheck1 = Group( "$setup" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck2 = Group( "$hold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck3 = Group( "$period" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck4 = Group( "$width" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional( COMMA + expr + COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck5 = Group( "$skew" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck6 = Group( "$recovery" + LPAR + controlledTimingCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck7 = Group( "$setuphold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck = (FollowedBy('$') + ( systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 | systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7 )).setName("systemTimingCheck") sdpd = if_ + Group(LPAR + expr + RPAR) + \ ( pathDescr1 | pathDescr2 ) + EQ + pathDelayValue + SEMI specifyItem = ~Keyword("endspecify") +( specparamDecl | pathDecl | levelSensitivePathDecl | edgeSensitivePathDecl | systemTimingCheck | sdpd ) """ x::= <specparam_declaration> x||= <path_declaration> x||= <level_sensitive_path_declaration> x||= <edge_sensitive_path_declaration> x||= <system_timing_check> x||= <sdpd> """ specifyBlock = Group( "specify" + ZeroOrMore( specifyItem ) + "endspecify" ).setName("specifyBlock") moduleItem = ~Keyword("endmodule") + ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | netDecl3 | netDecl1 | netDecl2 | timeDecl | integerDecl | realDecl | eventDecl | gateDecl | parameterOverride | continuousAssign | specifyBlock | initialStmt | alwaysStmt | task | functionDecl | # these have to be at the end - they start with identifiers moduleInstantiation | udpInstantiation ) """ All possible moduleItems, from Verilog grammar spec x::= <parameter_declaration> x||= <input_declaration> x||= <output_declaration> x||= <inout_declaration> ?||= <net_declaration> (spec does not seem consistent for this item) x||= <reg_declaration> x||= <time_declaration> x||= <integer_declaration> x||= <real_declaration> x||= <event_declaration> x||= <gate_declaration> x||= <UDP_instantiation> x||= <module_instantiation> x||= <parameter_override> x||= <continuous_assign> x||= <specify_block> x||= <initial_statement> x||= <always_statement> x||= <task> x||= <function> """ portRef = subscrIdentifier portExpr = portRef | Group( LBRACE + delimitedList( portRef ) + RBRACE ) port = portExpr | Group( ( DOT + identifier + LPAR + portExpr + RPAR ) ) moduleHdr = Group ( oneOf("module macromodule") + identifier + Optional( LPAR + Group( Optional( delimitedList( Group(oneOf("input output") + (netDecl1Arg | netDecl2Arg | netDecl3Arg) ) | port ) ) ) + RPAR ) + SEMI ).setName("moduleHdr") module = Group( moduleHdr + Group( ZeroOrMore( moduleItem ) ) + "endmodule" ).setName("module")#.setDebug() udpDecl = outputDecl | inputDecl | regDecl #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X") udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal") udpInitialStmt = Group( "initial" + identifier + EQ + udpInitVal + SEMI ).setName("udpInitialStmt") levelSymbol = oneOf("0 1 x X ? b B") levelInputList = Group( OneOrMore( levelSymbol ).setName("levelInpList") ) outputSymbol = oneOf("0 1 x X") combEntry = Group( levelInputList + COLON + outputSymbol + SEMI ) edgeSymbol = oneOf("r R f F p P n N *") edge = Group( LPAR + levelSymbol + levelSymbol + RPAR ) | \ Group( edgeSymbol ) edgeInputList = Group( ZeroOrMore( levelSymbol ) + edge + ZeroOrMore( levelSymbol ) ) inputList = levelInputList | edgeInputList seqEntry = Group( inputList + COLON + levelSymbol + COLON + ( outputSymbol | "-" ) + SEMI ).setName("seqEntry") udpTableDefn = Group( "table" + OneOrMore( combEntry | seqEntry ) + "endtable" ).setName("table") """ <UDP> ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ; <UDP_declaration>+ <UDP_initial_statement>? <table_definition> endprimitive """ udp = Group( "primitive" + identifier + LPAR + Group( delimitedList( identifier ) ) + RPAR + SEMI + OneOrMore( udpDecl ) + Optional( udpInitialStmt ) + udpTableDefn + "endprimitive" ) verilogbnf = OneOrMore( module | udp ) + StringEnd() verilogbnf.ignore( cppStyleComment ) verilogbnf.ignore( compilerDirective ) return verilogbnf
expression = Forward() # Literals intNumber = Regex(r'-?\d+')('integer') floatNumber = Regex(r'-?\d+\.\d+')('float') sciNumber = Combine((floatNumber | intNumber) + CaselessLiteral('e') + intNumber)('scientific') aString = quotedString('string') # Use lookahead to match only numbers in a list (can't remember why this is necessary) afterNumber = FollowedBy(",") ^ FollowedBy(")") ^ FollowedBy(LineEnd()) number = Group((sciNumber + afterNumber) | (floatNumber + afterNumber) | (intNumber + afterNumber))('number') boolean = Group(CaselessKeyword("true") | CaselessKeyword("false"))('boolean') none = Group(CaselessKeyword('none'))('none') argname = Word(alphas + '_', alphanums + '_')('argname') funcname = Word(alphas + '_', alphanums + '_')('funcname') ## Symbols leftParen = Literal('(').suppress() rightParen = Literal(')').suppress() comma = Literal(',').suppress() equal = Literal('=').suppress()
from rdflib.py3compat import bytestype ParserElement.setDefaultWhitespaceChars(" \n") String = STRING_LITERAL1 | STRING_LITERAL2 RDFLITERAL = Comp( 'literal', Param('string', String) + Optional( Param('lang', LANGTAG.leaveWhitespace()) | Literal('^^').leaveWhitespace() + Param('datatype', IRIREF).leaveWhitespace())) NONE_VALUE = object() EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t") EMPTY.setParseAction(lambda x: NONE_VALUE) TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM)) ROW.parseWithTabs() HEADER = Var + ZeroOrMore(Suppress("\t") + Var) HEADER.parseWithTabs() class TSVResultParser(ResultParser): def parse(self, source): if isinstance(source.read(0), bytestype):
from pyparsing import FollowedBy, Literal, Word def parenthesize(characters, name): return Literal("(") + Word(characters).setResultsName(name) + Literal(")") def decimalize(characters, name): return (Word(characters).setResultsName(name) + Literal(".").leaveWhitespace()) # Only used as the top of the appendix hierarchy a1 = Word(string.digits).setResultsName("a1") aI = Word("IVXLCDM").setResultsName("aI") # Catches the A in 12A but not in 12Awesome markerless_upper = Word(string.ascii_uppercase).setResultsName( 'markerless_upper') + ~FollowedBy(Word(string.ascii_lowercase)) paren_upper = parenthesize(string.ascii_uppercase, "paren_upper") paren_lower = parenthesize(string.ascii_lowercase, "paren_lower") paren_digit = parenthesize(string.digits, "paren_digit") period_upper = decimalize(string.ascii_uppercase, "period_upper") period_lower = decimalize(string.ascii_lowercase, "period_lower") period_digit = decimalize(string.digits, "period_digit") roman_upper = decimalize('IVXLCDM', "roman_upper")
ParserElement.setDefaultWhitespaceChars(" \t") NL = LineEnd().suppress() integer = Word(nums) plan = '1..' + integer("ubound") OK, NOT_OK = map(Literal, ['ok', 'not ok']) testStatus = (OK | NOT_OK) description = Regex("[^#\n]+") description.setParseAction(lambda t: t[0].lstrip('- ')) TODO, SKIP = map(CaselessLiteral, 'TODO SKIP'.split()) directive = Group( Suppress('#') + (TODO + restOfLine | FollowedBy(SKIP) + restOfLine.copy().setParseAction(lambda t: ['SKIP', t[0]]))) commentLine = Suppress("#") + empty + restOfLine testLine = Group( Optional(OneOrMore(commentLine + NL))("comments") + testStatus("passed") + Optional(integer)("testNumber") + Optional(description)("description") + Optional(directive)("directive")) bailLine = Group( Literal("Bail out!")("BAIL") + empty + Optional(restOfLine)("reason")) tapOutputParser = Optional(Group(plan)("plan") + NL) & \ Group(OneOrMore((testLine|bailLine) + NL))("tests")
# Factorial fact_expr = Group( func_term + OneOrMore(factop) ) fact_expr.setParseAction(operators.PostfixSymbol.process) fact_term = ( fact_expr | func_term ) # Exponent exp_term = Forward() # 'signop' in exponent is handled in process_expop(). exp_expr = Group( fact_term + expop + ZeroOrMore(signop) + exp_term ) exp_expr.setParseAction(operators.Exponent.process) exp_term <<= ( exp_expr | fact_term ) # Sign. sign_term = Forward() _signop = Optional(signop) # "try to avoid LR" was the original comment, dunno!? sign_expr = FollowedBy(_signop.expr + sign_term) + Group( _signop + sign_term ) sign_expr.setParseAction(operators.PrefixSymbol.process) sign_term <<= ( sign_expr | exp_term ) # Multiplication without sign has precendence so that 2km / 3h means # 2/3 km/h. Multiplication without sign is possible if RHS is a # variable/constant/unit. sm_exp_expr = Group( variable + expop + ZeroOrMore(signop) + exp_term ) sm_exp_expr.setParseAction(operators.Exponent.process) signless_mult_expr = Group( sign_term + OneOrMore( sm_exp_expr | variable ) ) signless_mult_expr.setParseAction(operators.InfixLeftSymbol.process) signless_mult_term = ( signless_mult_expr | sign_term ) # Multiplication. mult_expr = Group( signless_mult_term + OneOrMore(multop + signless_mult_term) ) mult_expr.setParseAction(operators.InfixLeftSymbol.process)
if n == 0: return Empty() else: return Group((Suppress(funOrbNumber(n)) + funCoefficients(n)).setResultsName("lastCoeffs")) # ====================> Basis File <========================== comment = Literal("#") + restOfLine parseAtomLabel = Word(srange("[A-Z]"), max=1) + Optional( Word(srange("[a-z]"), max=1)) parserBasisName = Word(alphanums + "-") + Suppress(restOfLine) parserFormat = OneOrMore(natural + NotAny(FollowedBy(point))) parserKey = (parseAtomLabel.setResultsName("atom") + parserBasisName.setResultsName("basisName") + Suppress(Literal("1"))) parserBasisData = OneOrMore(floatNumber) parserBasis = (parserKey + parserFormat.setResultsName("format") + parserBasisData.setResultsName("coeffs")) topParseBasis = OneOrMore(Suppress(comment)) + OneOrMore( Group(parserBasis + Suppress(Optional(OneOrMore(comment))))) # ===============================<>==================================== # Parsing From File
def _create_field_parser(): """ Creates a parser using pyparsing that works with bibfield rule definitions BNF like grammar: rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include | python_comment include ::= "include(" PATH ")" body ::= [inherit_from] (creator | derived | calculated) [checker] [documentation] [producer] aliases ::= json_id ["[0]" | "[n]"] ["," aliases] creator ::= "creator:" INDENT creator_body+ UNDENT creator_body ::= [decorators] source_format "," source_tag "," python_allowed_expr source_format ::= MASTER_FORMATS source_tag ::= QUOTED_STRING derived ::= "derived" INDENT derived_calculated_body UNDENT calculated ::= "calculated:" INDENT derived_calculated_body UNDENT derived_calculated_body ::= [decorators] "," python_allowed_exp decorators ::= (peristent_identfier | legacy | do_not_cache | parse_first | depends_on | only_if | only_if_master_value)* peristent_identfier ::= @persitent_identifier( level ) legacy ::= "@legacy(" correspondences+ ")" correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")" parse_first ::= "@parse_first(" jsonid+ ")" depends_on ::= "@depends_on(" json_id+ ")" only_if ::= "@only_if(" python_condition+ ")" only_if_master_value ::= "@only_if_master_value(" python_condition+ ")" inherit_from ::= "@inherit_from()" python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call checker ::= "checker:" INDENT checker_function+ UNDENT documentation ::= INDENT doc_string subfield* UNDENT doc_string ::= QUOTED_STRING subfield ::= "@subfield" json_id["."json_id*] ":" docstring producer ::= "producer:" INDENT producer_body UNDENT producer_body ::= producer_code "," python_dictionary producer_code ::= ident """ indent_stack = [1] def check_sub_indent(str, location, tokens): cur_col = col(location, str) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(str, location, "not a subentry") def check_unindent(str, location, tokens): if location >= len(str): return cur_col = col(location, str) if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(str, location, "not an unindent") def do_unindent(): indent_stack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(check_unindent) UNDENT.setParseAction(do_unindent) json_id = (Word(alphas + "_", alphanums + "_") + Optional(oneOf("[0] [n]")))\ .setResultsName("json_id", listAllMatches=True)\ .setParseAction(lambda tokens: "".join(tokens)) aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]"))) .setParseAction(lambda tokens: "".join(tokens)))\ .setResultsName("aliases") ident = Word(alphas + "_", alphanums + "_") dict_def = originalTextFor(nestedExpr('{', '}')) list_def = originalTextFor(nestedExpr('[', ']')) dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']')) function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')')) python_allowed_expr = (dict_def ^ list_def ^ dict_access ^ \ list_access ^ function_call ^ restOfLine)\ .setResultsName("value", listAllMatches=True) persistent_identifier = (Suppress("@persistent_identifier") + \ nestedExpr("(", ")"))\ .setResultsName("persistent_identifier") legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("legacy", listAllMatches=True) only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if") only_if_master_value = (Suppress("@only_if_value") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if_master_value") depends_on = (Suppress("@depends_on") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("depends_on") parse_first = (Suppress("@parse_first") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("parse_first") memoize = (Suppress("@memoize") + nestedExpr("(", ")"))\ .setResultsName("memoize") field_decorator = parse_first ^ depends_on ^ only_if ^ \ only_if_master_value ^ memoize ^ legacy #Independent decorators inherit_from = (Suppress("@inherit_from") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("inherit_from") override = (Suppress("@") + "override")\ .setResultsName("override") extend = (Suppress("@") + "extend")\ .setResultsName("extend") master_format = (Suppress("@master_format") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("master_format") \ .setParseAction(lambda toks: toks[0]) derived_calculated_body = (ZeroOrMore(field_decorator) + python_allowed_expr)\ .setResultsName('derived_calculated_def') derived = "derived" + Suppress(":") + \ INDENT + derived_calculated_body + UNDENT calculated = "calculated" + Suppress(":") + \ INDENT + derived_calculated_body + UNDENT source_tag = quotedString\ .setParseAction(removeQuotes)\ .setResultsName("source_tag", listAllMatches=True) source_format = Word(alphas, alphanums + "_")\ .setResultsName("source_format", listAllMatches=True) creator_body = (ZeroOrMore(field_decorator) + source_format + \ Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\ .setResultsName("creator_def", listAllMatches=True) creator = "creator" + Suppress(":") + \ INDENT + OneOrMore(creator_body) + UNDENT field_def = (creator | derived | calculated)\ .setResultsName("type_field", listAllMatches=True) #JsonExtra json_dumps = (Suppress('dumps') + Suppress(',') + python_allowed_expr)\ .setResultsName("dumps")\ .setParseAction(lambda toks: toks.value[0]) json_loads = (Suppress("loads") + Suppress(",") + python_allowed_expr)\ .setResultsName("loads")\ .setParseAction(lambda toks: toks.value[0]) json_extra = (Suppress('json:') + \ INDENT + Each((json_dumps, json_loads)) + UNDENT)\ .setResultsName('json_ext') #Checker checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\ .setResultsName("checker", listAllMatches=True) checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT) #Description/Documentation doc_double = QuotedString(quoteChar='"""', multiline=True) doc_single = QuotedString(quoteChar="'''", multiline=True) doc_string = INDENT + (doc_double | doc_single) + UNDENT description_body = (Suppress('description:') + doc_string).\ setParseAction(lambda toks: toks[0][0]) description = (description_body | doc_double | doc_single)\ .setResultsName('description') #Producer producer_code = (Word(alphas, alphanums + "_")\ + originalTextFor(nestedExpr("(", ")")))\ .setResultsName('producer_code', listAllMatches=True) producer_body = (producer_code + Suppress(",") + python_allowed_expr)\ .setResultsName("producer_rule", listAllMatches=True) producer = Suppress("producer:") + INDENT + OneOrMore(producer_body) + UNDENT schema = (Suppress('schema:') + INDENT + dict_def + UNDENT)\ .setParseAction(lambda toks: toks[0])\ .setResultsName('schema') body = Optional(field_def) & Optional(checker) & Optional(json_extra) \ & Optional(description) & Optional(producer) & Optional(schema) comment = Literal("#") + restOfLine + LineEnd() include = (Suppress("include") + quotedString)\ .setResultsName("includes", listAllMatches=True) rule = (Optional(persistent_identifier) + Optional(inherit_from) + \ Optional(override) + Optional(extend) +json_id + \ Optional(Suppress(",") + aliases) + Suppress(":") + \ INDENT + body + UNDENT)\ .setResultsName("rules", listAllMatches=True) return OneOrMore(rule | include | comment.suppress())