def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join( [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress( self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine( namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join([unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress(self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine(namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def nexus_iter(infile): import pyparsing pyparsing.ParserElement.enablePackrat() from pyparsing import Word, Literal, QuotedString, CaselessKeyword, \ OneOrMore, Group, Optional, Suppress, Regex, Dict ## beginblock = Suppress(CaselessKeyword("begin") + ## CaselessKeyword("trees") + ";") ## endblock = Suppress((CaselessKeyword("end") | ## CaselessKeyword("endblock")) + ";") comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) ## translate = CaselessKeyword("translate").suppress() name = Word(string.letters+string.digits+"_.") | QuotedString("'") ## ttrec = Group(Word(string.digits).setResultsName("number") + ## name.setResultsName("name") + ## Optional(",").suppress()) ## ttable = Group(translate + OneOrMore(ttrec) + Suppress(";")) newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) ## treesblock = Group(beginblock + ## Optional(ttable.setResultsName("ttable")) + ## Group(OneOrMore(tree)) + ## endblock) def not_begin(s): return s.strip().lower() != "begin trees;" def not_end(s): return s.strip().lower() not in ("end;", "endblock;") def parse_ttable(f): ttable = {} while True: s = f.next().strip() if not s: continue if s.lower() == ";": break if s[-1] == ",": s = s[:-1] k, v = s.split() ttable[k] = v if s[-1] == ";": break return ttable # read lines between "begin trees;" and "end;" f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile)) s = f.next().strip().lower() if s != "begin trees;": print sys.stderr, "Expecting 'begin trees;', got %s" % s raise StopIteration ttable = {} while True: try: s = f.next().strip() except StopIteration: break if not s: continue if s.lower() == "translate": ttable = parse_ttable(f) print "ttable: %s" % len(ttable) elif s.split()[0].lower()=='tree': match = tree.parseString(s) yield nexus.Newick(match, ttable)
def __get_spark_grammar(): ints = Word(nums) date = Optional(Combine(ints + '/' + ints + '/' + ints)) time = Optional(Combine(ints + ":" + ints + ":" + ints)) status = Optional(Word(string.ascii_uppercase)) service = Optional(Word(alphas + nums + '/' + '-' + '_' + '.' + '[' + ']' + ':' + '$')) message = Regex('.*') spark_grammar = date.setResultsName('date') + time.setResultsName('time') + status.setResultsName('status') + \ service.setResultsName('service') + message.setResultsName('message') return spark_grammar
def __get_windows_grammar(): ints = Word(nums) date = Optional(Combine(ints + '-' + ints + '-' + ints)) time = Optional(Combine(ints + ":" + ints + ":" + ints + ',')) status = Optional(Word(string.ascii_uppercase + string.ascii_lowercase)) service = Optional(Word(string.ascii_uppercase)) message = Regex('.*') windows_grammar = date.setResultsName('date') + time.setResultsName('time') + \ status.setResultsName('status') + service.setResultsName('service') + message.setResultsName('message') return windows_grammar
def parse_treesblock(infile): import string from pyparsing import Optional, Word, Regex, CaselessKeyword, Suppress from pyparsing import QuotedString comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) name = Word(alphanums+"_") | QuotedString("'") newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) ## treesblock = Group(beginblock + ## Optional(ttable.setResultsName("ttable")) + ## Group(OneOrMore(tree)) + ## endblock) def parse_ttable(f): ttable = {} while True: s = f.next().strip() if s.lower() == ";": break if s[-1] in ",;": s = s[:-1] k, v = s.split() ttable[k] = v if s[-1] == ";": break return ttable ttable = {} while True: try: s = infile.next().strip() except StopIteration: break if s.lower() == "translate": ttable = parse_ttable(infile) # print("ttable: %s" % len(ttable)) else: match = tree.parseString(s) yield Newick(match, ttable)
def parse_treesblock(infile): import string from pyparsing import Optional, Word, Regex, CaselessKeyword, Suppress from pyparsing import QuotedString comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) name = Word(alphanums + "_") | QuotedString("'") newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) ## treesblock = Group(beginblock + ## Optional(ttable.setResultsName("ttable")) + ## Group(OneOrMore(tree)) + ## endblock) def parse_ttable(f): ttable = {} while True: s = f.next().strip() if s.lower() == ";": break if s[-1] in ",;": s = s[:-1] k, v = s.split() ttable[k] = v if s[-1] == ";": break return ttable ttable = {} while True: try: s = infile.next().strip() except StopIteration: break if s.lower() == "translate": ttable = parse_ttable(infile) # print("ttable: %s" % len(ttable)) else: match = tree.parseString(s) yield Newick(match, ttable)
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: try: unicode_numbers = "".join( [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) except NameError: unicode_numbers = "".join( [chr(n) for n in range(0x10000) if chr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress( self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine( namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") expop = Literal('^') multop = oneOf('* /') factop = Literal('!') modop = Literal('%') signop = oneOf('+ -') opers = expop | signop | multop | factop | modop identifier = identifier + NotAny(opers) return identifier
# quoted string is either just stuff within quotes, or stuff within quotes, within # which there is nested curliness quotedItem = Group(curlyString) | charsNoQuotecurly quotedString = QUOTE + ZeroOrMore(quotedItem) + QUOTE number = Regex("[0-9]+") # Basis characters (by exclusion) for variable / field names. The following # list of characters is from the btparse documentation anyName = Regex("[^\s\"#%'(),={}]+") # btparse says, and the test bibs show by experiment, that macro and field names # cannot start with a digit. In fact entry type names cannot start with a digit # either (see tests/bibs). Cite keys can start with a digit notDigname = Regex("[^\d\s\"#%'(),={}][^\s\"#%'(),={}]*") comment = AT + CaselessLiteral("comment") + LCURLY + charsNoCurly.setResultsName("comment") + RCURLY comment.setParseAction(Comment.fromParseResult) # The name types with their digiteyness notDigLower = notDigname.copy().setParseAction(lambda t: t[0].lower()) macroDef = notDigLower.copy() macroRef = notDigLower.copy().setParseAction(MacroReference.fromParseResult) fieldName = notDigLower.copy() entryType = notDigLower.setResultsName("entry type") citeKey = anyName.setResultsName("cite key") string = number | macroRef | quotedString | curlyString # There can be hash concatenation fieldValue = string + ZeroOrMore(HASH + string)
# either (see tests/bibs). Cite keys can start with a digit not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*') # Comment comments out to end of line comment = (AT + CaselessLiteral('comment') + Regex("[\s{(].*").leaveWhitespace()) # The name types with their digiteyness not_dig_lower = not_digname.copy().setParseAction( lambda t: t[0].lower()) macro_def = not_dig_lower.copy() macro_ref = not_dig_lower.copy().setParseAction(lambda t : Macro(t[0].lower())) field_name = not_dig_lower.copy() # Spaces in names mean they cannot clash with field names entry_type = not_dig_lower.setResultsName('entry type') cite_key = any_name.setResultsName('cite key') # Number has to be before macro name string = (number | macro_ref | quoted_string | curly_string) # There can be hash concatenation field_value = string + ZeroOrMore(HASH + string) field_def = Group(field_name + EQUALS + field_value) entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def)) # Entry is surrounded either by parentheses or curlies entry = (AT + entry_type + bracketed(cite_key + COMMA + entry_contents)) # Preamble is a macro-like thing with no name preamble = AT + CaselessLiteral('preamble') + bracketed(field_value)
def nexus_iter(infile): import pyparsing pyparsing.ParserElement.enablePackrat() from pyparsing import Word, Literal, QuotedString, CaselessKeyword, \ OneOrMore, Group, Optional, Suppress, Regex, Dict ## beginblock = Suppress(CaselessKeyword("begin") + ## CaselessKeyword("trees") + ";") ## endblock = Suppress((CaselessKeyword("end") | ## CaselessKeyword("endblock")) + ";") comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) ## translate = CaselessKeyword("translate").suppress() name = Word(string.letters + string.digits + "_") | QuotedString("'") ## ttrec = Group(Word(string.digits).setResultsName("number") + ## name.setResultsName("name") + ## Optional(",").suppress()) ## ttable = Group(translate + OneOrMore(ttrec) + Suppress(";")) newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) ## treesblock = Group(beginblock + ## Optional(ttable.setResultsName("ttable")) + ## Group(OneOrMore(tree)) + ## endblock) def not_begin(s): return s.strip().lower() != "begin trees;" def not_end(s): return s.strip().lower() not in ("end;", "endblock;") def parse_ttable(f): ttable = {} while True: s = f.next().strip() if s.lower() == ";": break if s[-1] in ",;": s = s[:-1] k, v = s.split() ttable[k] = v if s[-1] == ";": break return ttable # read lines between "begin trees;" and "end;" f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile)) s = f.next().strip().lower() if s != "begin trees;": print sys.stderr, "Expecting 'begin trees;', got %s" % s raise StopIteration ttable = {} while True: try: s = f.next().strip() except StopIteration: break if s.lower() == "translate": ttable = parse_ttable(f) print "ttable: %s" % len(ttable) else: match = tree.parseString(s) yield nexus.Newick(match, ttable)
bs_sp = Group(shell_sp + OneOrMore(exponent_sp)) endbs = Word('*').suppress() basis_set = Group( Str.setResultsName('bdescr') + Number + OneOrMore(bs | bs_sp).setResultsName('basis') + Optional(endbs)) ecp_body = OneOrMore(eol + SkipTo(eol).suppress() + Group(Number + OneOrMore(Group(Number + Number + Number)))) ecp = Group( Str.setResultsName('edescr') + Number.suppress() + SkipTo(Number).suppress() + Number.setResultsName('lmax') + Number.setResultsName('core') + ecp_body.setResultsName('ecp')) # define grammar here grammar = Optional(OneOrMore(comment)) + Optional(endbs) \ + Group(OneOrMore(basis_set)) + Optional(OneOrMore(comment)) \ + Optional(Group(OneOrMore(ecp))) lstr = { 'S': 0, 'SP': 1, 'P': 2, 'D': 3, 'F': 4, 'G': 5,
def parse_line(line): # generic terms _name_re = '[a-zA-Z_][a-zA-Z0-9_]*' name_re = Regex(_name_re) perm = Or(['public', 'private']).setResultsName('permission') name = name_re.setResultsName('name') # script tags script_start = Regex('<script.*lang="ts">').setResultsName('script_start') script_stop = Regex('</script>').setResultsName('script_stop') # interface interface = Group(Suppress('interface') + name).setResultsName('interface') # class ext = Optional( Suppress('extends') + name_re.setResultsName('parent_class')) exp = Optional(ZeroOrMore(Regex('export|default'))) class_ = Suppress(exp) + Suppress('class') + name + ext + Suppress('{') # decorator decorator = Suppress('@') + name + Suppress(Optional(Regex("\(.*\)"))) def func(s, l, t): output = t.asDict() params = [] for item in output['parameters']: temp = dict(name=None, type=None, description=None) temp.update(item) params.append(temp) output['parameters'] = params return t # method val = Regex('[a-zA-Z_][a-zA-Z0-9_]*\[?\]?') ptype = val.setResultsName('type') dfal = Regex('".*"|[.*]|{.*}|' + _name_re).setResultsName('default') ret = val.setResultsName('returns') rtype = Optional(Suppress(':') + ret) opt = Optional(Suppress('=') + dfal) param = Group(name + Optional(Suppress(':') + ptype) + Optional(opt)) params = delimitedList( param, delim=',').setResultsName('parameters').setParseAction(func) method = perm + name + Suppress('(') + Optional(params) + Suppress( ')') + rtype + Suppress('{') # constructor constructor = perm + Regex("constructor") + Suppress('(') + Optional( params) + Suppress(')') + rtype + Suppress('{') # getter getter = perm + Suppress('get') + name + Suppress('()') + rtype + Suppress( '{') # setter setter = perm + Suppress('set') + name + Suppress('(') + params + Suppress( ')') + rtype + Suppress('{') # property atype = Optional(Suppress(':') + name_re.setResultsName('type')) value = Regex('.*').setResultsName('value') val = Optional(Suppress('=') + value) prop = perm + name + atype + val + Suppress(';') # docstring start and stop docstart = Regex('/\*\*').setResultsName('docstart') docstop = Regex('\*/').setResultsName('docstop') # line of docstring doc_com = Regex('\*(?!/)') name_re = Regex('[a-zA-Z_][a-zA-Z0-9_]*') desc = Regex('.*').setResultsName('description') info = doc_com + desc param = Group(Suppress(doc_com) + Suppress('@param') + name + desc).setResultsName('params') returns = Regex('.*').setResultsName('returns') returns = doc_com + Regex('@returns?') + returns docline = returns | param | info parsers = [('setter', setter), ('method', method), ('getter', getter), ('constructor', constructor), ('script_start', script_start), ('script_stop', script_stop), ('interface', interface), ('class', class_), ('decorator', decorator), ('docstart', docstart), ('docstop', docstop), ('docline', docline), ('property', prop)] for ctype, parser in parsers: # return parser.parseString(line).asDict() content = {} try: content = parser.parseString(line).asDict() except: continue return {'content_type': ctype, 'content': content} return {}
# published on PyPI. signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction( tokenMap(int)) variable = Word(alphas, bodyChars=alphanums) stack_item = Suppress(",") + (signed_integer | Suppress("*") | variable) flag = oneOf(list(VTT_MNEMONIC_FLAGS.keys())) # convert flag to binary string flag.setParseAction(tokenMap(lambda t: VTT_MNEMONIC_FLAGS[t])) flags = Combine(OneOrMore(flag)).setResultsName("flags") delta_point_index = pyparsing_common.integer.setResultsName("point_index") delta_rel_ppem = pyparsing_common.integer.setResultsName("rel_ppem") delta_step_no = signed_integer.setResultsName("step_no") # the step denominator is only used in VTT's DELTA[CP]* instructions, # and must always be 8 (sic!), so we can suppress it. delta_spec = (delta_point_index + Suppress("@") + delta_rel_ppem + delta_step_no + Optional(Literal("/8")).suppress()) delta = nestedExpr("(", ")", delta_spec, ignoreExpr=None) deltas = Group(OneOrMore(delta)).setResultsName("deltas") args = deltas | flags stack_items = OneOrMore(stack_item).setResultsName("stack_items") instruction = Group(mnemonic + Suppress("[") + Optional(args) + Suppress("]") + Optional(stack_items))
normalized_port_range = (port ^ port_range).setParseAction(to_port_range) ports = delimitedList(normalized_port_range)('ports') # IP addresses, name of another group, or sg-* security_group = Regex("sg-[\w\d]+") group_name = Regex("[\w\d\-]+") mask = Word("/") + Word(nums).setParseAction(to_int)('mask') ip= (Combine(Word(nums) + ('.' + Word(nums))*3)('ip') + Optional(mask)('mask')).setParseAction(normalize_ip) parser = Optional(protocol)('protocol') + \ Optional(port_) + \ ports + \ (ip.setResultsName('ip_and_mask') ^ security_group.setResultsName('security_group') ^ group_name('group_name')) class Rule(object): def __init__(self, protocol, from_port, to_port, address=None, group=None, group_name=None): """constructs a new rule :param protocol tcp or udp :param from_port :param to_port :param address :param group sg-style (should almost never be used) :param group_name """ self.protocol = protocol or "tcp" self.from_port = from_port
# ) # + tld_label # ) # Problem with above domain_fqdn is that PyParsing cannot do lookahead in time, so # we use the much-vaunted Regex() for domain_fqdn domain_fqdn_regex = '('\ + '(' \ + subdomain_label_regex \ + '\.' \ + '){0,16}' + \ domain_label_regex + '\.' \ + '){0,1}'\ + tld_label_regex domain_fqdn = Regex(domain_fqdn_regex) domain_fqdn.setName('<strict-fqdn>') domain_fqdn.setResultsName('domain_name') # Generic fully-qualified domain name (less stringent) domain_generic_fqdn = Combine( domain_generic_label + ZeroOrMore( Literal('.') + domain_generic_label ) + Optional(Char('.')) ) domain_generic_fqdn.setName('<generic-fqdn>') domain_generic_fqdn.setResultsName('domain_name') quoted_domain_generic_fqdn = ( Combine(squote - domain_generic_fqdn - squote)
def sql2table_list(tables, show_columns=True): def field_act(s, loc, tok): return " ".join(tok).replace('\n', '\\n') def field_list_act(s, loc, tok): return tok def create_table_act(s, loc, tok): table = Table(tok["tableName"], None, {}, {}) for t in tok["fields"]: if str(t).startswith("FK:"): l = t[3:].split(":") if len(l) > 2: table.fkeys[l[0]] = {"ftable": l[1], "fcoloumn": l[2]} else: table.fkeys[l[0]] = {"ftable": l[1]} elif str(t).startswith("PK:"): table.pk = t[3:] elif str(t).startswith("KEY:"): pass else: l = t.split(" ") table.columns[l[0]] = " ".join(l[1:]) tables.append(table) def add_fkey_act(s, loc, tok): return '{tableName}:{keyName}:{fkTable}:{fkCol}'.format(**tok) def fkey_act(s, loc, tok): return 'FK:{keyName}:{fkTable}:{fkCol}'.format(**tok) def fkey_nocols_act(s, loc, tok): return 'FK:{keyName}:{fkTable}'.format(**tok) # def fkey_list_act(s, loc, tok): # return "\n ".join(tok) def other_statement_act(s, loc, tok): pass def join_string_act(s, loc, tok): return "".join(tok).replace('\n', '\\n') def quoted_default_value_act(s, loc, tok): return tok[0] + " " + "".join(tok[1::]) def pk_act(s, loc, tok): return 'PK:{primary_key}'.format(**tok) def k_act(s, loc, tok): pass def no_act(s, loc, tok): pass string = Regex('[a-zA-Z0-9=_]+') ws = OneOrMore(White()).suppress() lp = Regex('[(]').suppress() rp = Regex('[)]').suppress() c = Regex('[,]').suppress() q = Regex("[`]").suppress() parenthesis = Forward() parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")" parenthesis.setParseAction(join_string_act) quoted_string = "'" + ZeroOrMore(CharsNotIn("'")) + "'" quoted_string.setParseAction(join_string_act) quoted_default_value = "DEFAULT" + quoted_string + OneOrMore( CharsNotIn(", \n\t")) quoted_default_value.setParseAction(quoted_default_value_act) column_comment = CaselessKeyword("COMMENT") + quoted_string primary_key = CaselessKeyword('PRIMARY').suppress() + CaselessKeyword( "KEY").suppress() + lp + string.setResultsName('primary_key') + rp primary_key.ignore("`") primary_key.setParseAction(pk_act) key_def = Optional(CaselessKeyword('UNIQUE').suppress()) + CaselessKeyword( 'KEY').suppress() + Word(alphanums + "_") + lp + delimitedList( string.setResultsName('key'), delim=",") + rp key_def.ignore("`") key_def.setParseAction(k_act) fkey_def = CaselessKeyword("CONSTRAINT") + Word( alphanums + "_" ) + CaselessKeyword("FOREIGN") + CaselessKeyword("KEY") + lp + Word( alphanums + "_" ).setResultsName("keyName") + rp + CaselessKeyword("REFERENCES") + Word( alphanums + "._").setResultsName("fkTable") + lp + Word( alphanums + "_").setResultsName("fkCol") + rp + Optional( CaselessKeyword("DEFERRABLE") ) + Optional( CaselessKeyword("ON") + (CaselessKeyword("DELETE") | CaselessKeyword("UPDATE")) + (CaselessKeyword("CASCADE") | CaselessKeyword("RESTRICT") | CaselessKeyword("NO ACTION") | CaselessKeyword("SET NULL")) ) + Optional( CaselessKeyword("ON") + (CaselessKeyword("DELETE") | CaselessKeyword("UPDATE")) + (CaselessKeyword("CASCADE") | CaselessKeyword("RESTRICT") | CaselessKeyword("NO ACTION") | CaselessKeyword("SET NULL"))) fkey_def.ignore("`") if show_columns: fkey_def.setParseAction(fkey_act) else: fkey_def.setParseAction(fkey_nocols_act) #fkey_list_def = ZeroOrMore(Suppress(",") + fkey_def) #fkey_list_def.setParseAction(fkey_list_act) field_def = Word(alphanums + "_\"':-/[].") + Word( alphanums + "_\"':-/[].") + Optional( CaselessKeyword("NOT NULL") | CaselessKeyword("DEFAULT") + Word(alphanums + "_\"':-/[].")) + Optional( OneOrMore(quoted_default_value | column_comment | Word(alphanums + "_\"'`:-/[].") | parenthesis)) field_def.ignore("`") # if columns: field_def.setParseAction(field_act) # else: # field_def.setParseAction(no_act) field_list_def = delimitedList(\ (primary_key.suppress() | \ key_def.suppress() | \ fkey_def | \ field_def \ ), delim=","\ ) #if columns else field_def.suppress() field_list_def.setParseAction(field_list_act) tablename_def = (Word(alphanums + "_.") | QuotedString("\"")) tablename_def.ignore("`") create_table_def = CaselessKeyword("CREATE").suppress() + CaselessKeyword( "TABLE").suppress() + tablename_def.setResultsName( "tableName") + lp + field_list_def.setResultsName( "fields") + rp + ZeroOrMore( Word(alphanums + "_\"'`:-/[].=")) + Word(";").suppress() create_table_def.setParseAction(create_table_act) add_fkey_def = CaselessKeyword( "ALTER") + "TABLE" + "ONLY" + tablename_def.setResultsName( "tableName") + "ADD" + "CONSTRAINT" + Word( alphanums + "_" ) + "FOREIGN" + "KEY" + "(" + Word(alphanums + "_").setResultsName( "keyName") + ")" + "REFERENCES" + Word( alphanums + "._").setResultsName("fkTable") + "(" + Word( alphanums + "_" ).setResultsName("fkCol") + ")" + Optional( Literal("DEFERRABLE")) + Optional( Literal("ON") + "DELETE" + (Literal("CASCADE") | Literal("RESTRICT"))) + ";" add_fkey_def.setParseAction(add_fkey_act) other_statement_def = OneOrMore(CharsNotIn(";")) + ";" other_statement_def.setParseAction(other_statement_act) comment_def = "--" + ZeroOrMore(CharsNotIn("\n")) comment_def.setParseAction(other_statement_act) return OneOrMore(comment_def | create_table_def | add_fkey_def | other_statement_def)
_rank = _quoted(Optional(Word(alphanums))) def _parse_date(s, l, t): try: return datetime.strptime(t[0], "%Y-%m-%d") except ValueError: # If the date is invalid, return the epoch return datetime.utcfromtimestamp(0) _date = _quoted(Word(nums + '-')).setParseAction(_parse_date) # Define format expected for each field _fields = [ _int.setResultsName('Game_ID'), _quoted(Word(alphanums)).setResultsName('Tournament_Code'), _date.setResultsName('Game_Date'), _int.setResultsName('Round'), _int.setResultsName('Pin_Player_1'), _color.setResultsName('Color_1'), _rank.setResultsName('Rank_1'), _int.setResultsName('Pin_Player_2'), _color.setResultsName('Color_2'), _rank.setResultsName('Rank_2'), _int.setResultsName('Handicap'), _int.setResultsName('Komi'), _color.setResultsName('Result'), # If Sgf_Code is NULL then the key will not be inserted into the results dict _quoted(Optional(Word(alphanums + '-').setResultsName('Sgf_Code'))) | Literal('NULL'),
# Version 1 element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|" "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|" "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|" "S[bcegimnr]?|T[abcehilm]|Uu[bhopqst]|U|V|W|Xe|Yb?|Z[nr]") elementRef = Group( element + Optional( Word( digits ), default="1" ) ) formula = OneOrMore( elementRef ) fn = lambda elemList : sum( [ atomicWeight[elem]*int(qty) for elem,qty in elemList ] ) test( formula, "H2O", fn ) test( formula, "C6H5OH", fn ) test( formula, "NaCl", fn ) print # Version 2 - access parsed items by field name elementRef = Group( element.setResultsName("symbol") + \ Optional( Word( digits ), default="1" ).setResultsName("qty") ) formula = OneOrMore( elementRef ) fn = lambda elemList : sum( [ atomicWeight[elem.symbol]*int(elem.qty) for elem in elemList ] ) test( formula, "H2O", fn ) test( formula, "C6H5OH", fn ) test( formula, "NaCl", fn ) print # Version 3 - convert integers during parsing process integer = Word( digits ).setParseAction(lambda t:int(t[0])) elementRef = Group( element.setResultsName("symbol") + \ Optional( integer, default=1 ).setResultsName("qty") ) formula = OneOrMore( elementRef )
skeletonName = Keyword(":name") + bonename.setResultsName('name') unitDefinition = Group(Word(alphas) + (floatValue | intValue | Word(alphas))) unitSection = Keyword(":units") + \ Dict(ZeroOrMore(unitDefinition)).setResultsName('units') documentationSection = Keyword(':documentation') + \ SkipTo(":").setResultsName('documentation') rootSection = Group( Keyword(":root") & (Keyword("order") + channels.setResultsName('channels')) & (Keyword("position") + floatVector.setResultsName('position')) & (Keyword("axis") + rotationOrder.setResultsName("axisRotationOrder")) & (Keyword("orientation") + floatVector.setResultsName("axis"))).setResultsName('root') bone = Group(begin + Keyword("id") + intValue + Keyword("name") + bonename.setResultsName("name") + Keyword("direction") + floatVector.setResultsName("direction") + Keyword("length") + floatValue.setResultsName("length") + Keyword("axis") + floatVector.setResultsName("axis") + rotationOrder.setResultsName("axisRotationOrder") + Optional( Keyword("dof") + channels.setResultsName("channels") + Keyword("limits") + limits.setResultsName("limits")) + end) bonedataSection = (Keyword(":bonedata") + Group(ZeroOrMore(bone)).setResultsName("bones")) hierarchyEntry = Group( bonename.setResultsName("parent") + Group(OneOrMore(bonename)).setResultsName("children") + Suppress(LineEnd())) hierarchySection = ( Keyword(":hierarchy") + begin + LineEnd() + Dict(OneOrMore(hierarchyEntry)).setResultsName("hierarchy") + end)
def parser(text): var_any = Literal("_") p = Regex("[\w:]+").setResultsName("text") var_any = Regex("_") #handled by p anyway attribute = Literal("@").suppress() eq = Literal("=").suppress() closure = (Literal("?") | Literal("*") | Literal("+")).setResultsName("closure") test = Literal("^").setResultsName("modifier") + p | p + Literal("$").setResultsName("modifier") | p #| var_any axis = (Literal("\\\\*") | \ Literal("\\\\") | \ Literal("\\") | \ Literal(".") | \ Literal("//*") | \ Literal("//") | \ Literal("/") | \ Literal("-->") | \ Literal("<--") | \ Literal("->") | \ Literal("<-") | \ Literal("==>") | \ Literal("<==") | \ Literal("=>") | \ Literal("<=")).setResultsName("connector") g_left_brack = Literal("[").suppress() g_right_brack = Literal("]").suppress() # working """ abspath = Forward() locstep = Forward() node = test.setResultsName("node") attr_test = Group(attribute.suppress() + node.setResultsName("attr") + eq.suppress() + node.setResultsName("attr_val")).setResultsName("attr_test") predicate = (Group(Literal("[").suppress() + attr_test + Literal("]").suppress()).setResultsName("predicate") |\ Group(Literal("[").suppress() + abspath + Literal("]").suppress()).setResultsName("predicate")) locstep << Group(axis.setResultsName("axis") + node + \ Optional(predicate + Optional(closure).setResultsName("closure"))).setResultsName("locstep") abs2 = abspath abspath << ( Group(locstep.setResultsName("left_step") + abs2).setResultsName("abspath") | \ locstep.setResultsName("right_step") ) # TODO locpath = abspath fexpr = locpath.setResultsName("exp") """ # clean locpath = Forward() steps = Forward() fexpr = locpath.setResultsName("exp") attr_test = Group(attribute + p.setResultsName("attr") + eq + p.setResultsName("attr_val")) pred_opt = (fexpr.setResultsName("predicate") | attr_test.setResultsName("attr_test")) # connector order handling is the same as EmuQL, but the root lacks a left, as it refers to context node nodetest = Group(test + Optional(g_left_brack + pred_opt + g_right_brack + Optional(closure))) steps << ( Group(nodetest("left") + axis + steps("right")) | \ Group(test + Optional(g_left_brack + pred_opt + g_right_brack + Optional(closure)))) locpath << Group(axis + steps.setResultsName("right")) return fexpr.parseString(text)
INTO|VALUES|DELETE|UPDATE|SET|CREATE|INDEX|USING|BTREE|HASH| ON|INTEGER|FLOAT|DATETIME|DATE|VARCHAR|CHAR|TABLE|DATABASE| DROP|ORDER|BY|ASC|DESC) # Define basic symbols LPAR, RPAR = map(Suppress, '()') dot = Literal(".").suppress() comma = Literal(",").suppress() semi_colon = Literal(";").suppress() # Basic identifier used to define vars, tables, columns identifier = ~keywords + Word(alphas, alphanums + '_') # Literal Values integer_literal = Regex(r"([+-]?[1-9][0-9]*|0)") integer_literal = integer_literal.setResultsName('integer_literal') float_literal = Regex(r"([+-]?[1-9][0-9]*|0)\.[0-9]+") float_literal = float_literal.setResultsName('float_literal') numeric_literal = float_literal | integer_literal string_literal = QuotedString("'").setResultsName('string_literal') literal_value = (numeric_literal|string_literal|NULL) # SQL-Type-names INTEGER = INTEGER.setResultsName('type_name') FLOAT = FLOAT.setResultsName('type_name') DATETIME = DATETIME.setResultsName('type_name') DATE = DATE.setResultsName('type_name') VARCHAR = VARCHAR.setResultsName('type_name') CHAR = CHAR.setResultsName('type_name') # SQL-Data-types
elif root.high: return f'{root}({write_tree(root.high)},)' elif root.low: return f'{root}(,{write_tree(root.low)})' else: return f'{root}' # Strategy tree grammar node = Regex(rf'\w+[{NEVER_FIND_FLAG}]?') LPAREN, COMMA, RPAREN = map(Suppress, '(,)') tree = Forward() subtree = Group(Optional(tree)) subtrees = LPAREN - subtree.setResultsName( 'high') - COMMA - subtree.setResultsName('low') - RPAREN tree << node.setResultsName('root') - Optional(subtrees) def read_tree(tree_str, gusher_map, start=BASKET_LABEL): """Read the strategy encoded in tree_str and build the corresponding decision tree. V(H, L) represents the tree with root node V, high subtree H, and low subtree L. A node name followed by * indicates that the gusher is being opened solely for information and the Goldie will never be found there.""" def build_tree( tokens ): # recursively convert ParseResults object into GusherNode tree findable = tokens.root[-1] is not NEVER_FIND_FLAG rootname = tokens.root.rstrip(NEVER_FIND_FLAG) try: root = GusherNode(rootname, gusher_map=gusher_map,
LessThanCondition, LessThanOrEqualCondition, RegexCondition, RegexNegatedCondition) end_of_line = Regex(r' *\n') ^ LineEnd() settings_table = Literal('*** Settings ***') + Regex(r'[^\*]+(?=\*)') settings_table.setParseAction(lambda t: '\n'.join(t)) variables_table = Literal('*** Variables ***') + Regex(r'[^\*]+(?=\*)') variables_table.setParseAction(lambda t: '\n'.join(t)) keywords_table = Literal('*** Keywords ***') + CharsNotIn('') + StringEnd() keywords_table.setParseAction(lambda t: '\n'.join(t)) state_name = Regex(r'\w+( \w+)*') state_name.leaveWhitespace() state_name = state_name.setResultsName('state_name') robo_step = Regex(r'([\w\$\{\}][ \w\$\{\}]*[\w\}]|\w)') robo_step.leaveWhitespace() robo_step = robo_step.setResultsName('robo_step') variable = Regex(Variable.REGEX) variable_value = Regex(r'[\w\$\{\}!?\-\=\_\.\/]+( [\w\$\{\}!?\-\=\_\.\/]+)*') splitter = Literal(' ') + OneOrMore(' ') splitter.setParseAction(lambda t: ' ') variable_values = (variable_value + ZeroOrMore(splitter + variable_value)).setResultsName('variable_values') variable_values.setParseAction(lambda t: [[t[2 * i] for i in range(int((len(t) + 1) / 2))]])
def iter_trees(infile): import pyparsing pyparsing.ParserElement.enablePackrat() from pyparsing import ( Word, Literal, QuotedString, CaselessKeyword, CharsNotIn, OneOrMore, Group, Optional, Suppress, Regex, Dict, ZeroOrMore, alphanums, nums) comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) name = Word(alphanums+"_.") | QuotedString("'") newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) def not_begin(s): # print('not_begin', s) return s.strip().lower() != "begin trees;" def not_end(s): # print('not_end', s) return s.strip().lower() not in ("end;", "endblock;") def parse_ttable(f): ttable = {} # com = Suppress('[') + ZeroOrMore(CharsNotIn(']')) + Suppress(']') com = Suppress('[' + ZeroOrMore(CharsNotIn(']') + ']')) while True: s = next(f).strip() if not s: continue s = com.transformString(s).strip() if s.lower() == ";": break b = False if s[-1] in ",;": if s[-1] == ';': b = True s = s[:-1] # print(s) k, v = s.split() ttable[k] = v if b: break return ttable # read lines between "begin trees;" and "end;" f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile)) s = next(f).strip().lower() if s != "begin trees;": print("Expecting 'begin trees;', got %s" % s, file=sys.stderr) raise StopIteration ttable = {} while True: try: s = next(f).strip() except StopIteration: break if not s: continue if s.lower() == "translate": ttable = parse_ttable(f) # print "ttable: %s" % len(ttable) elif s.split()[0].lower()=='tree': match = tree.parseString(s) yield Newick(match, ttable)
floatVector.setResultsName('position')) & (Keyword("axis") + rotationOrder.setResultsName("axisRotationOrder")) & (Keyword("orientation") + floatVector.setResultsName("axis")) ).setResultsName('root') bone = Group( begin + Keyword("id") + intValue + Keyword("name") + bonename.setResultsName("name") + Keyword("direction") + floatVector.setResultsName("direction") + Keyword("length") + floatValue.setResultsName("length") + Keyword("axis") + floatVector.setResultsName("axis") + rotationOrder.setResultsName("axisRotationOrder") + Optional( Keyword("dof") + channels.setResultsName("channels") + Keyword("limits") + limits.setResultsName("limits") ) + end ) bonedataSection = ( Keyword(":bonedata") + Group(ZeroOrMore(bone)).setResultsName("bones")
nonzero_digits = Word('123456789') integer_literal = Regex(r"([+-]?[1-9][0-9]*|0)") Literal(".") num_dot = Literal(".") real_number_literal = Regex(r"([+-]?[1-9][0-9]*|0)\.[0-9]+") numeric_literal = real_number_literal | integer_literal string_literal = QuotedString("'") literal_value = (numeric_literal | string_literal | NULL) literal_value = literal_value.setName('literal_value') # Data-types integer_type = INTEGER float_type = FLOAT datetime_type = DATETIME date_type = DATE string_size = integer_literal.setResultsName('size') nvarchar_type = Group(VARCHAR + LPAR + string_size + RPAR) nchar_type = Group(CHAR + LPAR + string_size + RPAR) data_type = (integer_type | float_type | datetime_type | date_type | nvarchar_type | nchar_type).setResultsName('data_type') # Table alias = identifier.copy().setResultsName('alias') simple_table_name = identifier.setResultsName("table_name") table_name = simple_table_name.copy() # Column simple_column_name = identifier.setResultsName("column_name") fully_qualified_column_name = Group(simple_table_name + dot + simple_column_name) column_name = fully_qualified_column_name | simple_column_name
# either (see tests/bibs). Cite keys can start with a digit not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*') # Comment comments out to end of line comment = (AT + CaselessLiteral('comment') + Regex("[\s{(].*").leaveWhitespace()) # The name types with their digiteyness not_dig_lower = not_digname.copy().setParseAction( lambda t: t[0].lower()) macro_def = not_dig_lower.copy() macro_ref = not_dig_lower.copy().setParseAction(lambda t : Macro(t[0].lower())) field_name = not_dig_lower.copy() # Spaces in names mean they cannot clash with field names entry_type = not_dig_lower.setResultsName('entry type') cite_key = any_name.setResultsName('cite key') # Number has to be before macro name string = (number | macro_ref | quoted_string | curly_string) # There can be hash concatenation field_value = string + ZeroOrMore(HASH + string) field_def = Group(field_name + EQUALS + field_value) entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def)) # Entry is surrounded either by parentheses or curlies entry = (AT + entry_type + bracketed(cite_key + COMMA + entry_contents)) # Preamble is a macro-like thing with no name preamble = AT + CaselessLiteral('preamble') + bracketed(field_value)
def iter_trees(infile): import pyparsing pyparsing.ParserElement.enablePackrat() from pyparsing import (Word, Literal, QuotedString, CaselessKeyword, CharsNotIn, OneOrMore, Group, Optional, Suppress, Regex, Dict, ZeroOrMore, alphanums, nums) comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) name = Word(alphanums + "_.") | QuotedString("'") newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) def not_begin(s): # print('not_begin', s) return s.strip().lower() != "begin trees;" def not_end(s): # print('not_end', s) return s.strip().lower() not in ("end;", "endblock;") def parse_ttable(f): ttable = {} # com = Suppress('[') + ZeroOrMore(CharsNotIn(']')) + Suppress(']') com = Suppress('[' + ZeroOrMore(CharsNotIn(']') + ']')) while True: s = next(f).strip() if not s: continue s = com.transformString(s).strip() if s.lower() == ";": break b = False if s[-1] in ",;": if s[-1] == ';': b = True s = s[:-1] # print(s) k, v = s.split() ttable[k] = v if b: break return ttable # read lines between "begin trees;" and "end;" f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile)) s = next(f).strip().lower() if s != "begin trees;": print("Expecting 'begin trees;', got %s" % s, file=sys.stderr) raise StopIteration ttable = {} while True: try: s = next(f).strip() except StopIteration: break if not s: continue if s.lower() == "translate": ttable = parse_ttable(f) # print "ttable: %s" % len(ttable) elif s.split()[0].lower() == 'tree': match = tree.parseString(s) yield Newick(match, ttable)
# Version 1 element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|" "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|" "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|" "S[bcegimnr]?|T[abcehilm]|Uu[bhopqst]|U|V|W|Xe|Yb?|Z[nr]") elementRef = Group( element + Optional( Word( digits ), default="1" ) ) formula = OneOrMore( elementRef ) fn = lambda elemList : sum( [ atomicWeight[elem]*int(qty) for elem,qty in elemList ] ) test( formula, "H2O", fn ) test( formula, "C6H5OH", fn ) test( formula, "NaCl", fn ) print # Version 2 - access parsed items by field name elementRef = Group( element.setResultsName("symbol") + \ Optional( Word( digits ), default="1" ).setResultsName("qty") ) formula = OneOrMore( elementRef ) fn = lambda elemList : sum( [ atomicWeight[elem.symbol]*int(elem.qty) for elem in elemList ] ) test( formula, "H2O", fn ) test( formula, "C6H5OH", fn ) test( formula, "NaCl", fn ) print # Version 3 - convert integers during parsing process integer = Word( digits ).setParseAction(lambda t:int(t[0])) elementRef = Group( element.setResultsName("symbol") + \ Optional( integer, default=1 ).setResultsName("qty") ) formula = OneOrMore( elementRef )
# btparse says, and the test bibs show by experiment, that macro and field names # cannot start with a digit. In fact entry type names cannot start with a digit # either (see tests/bibs). Cite keys can start with a digit not_digname = Regex("[^\d\s\"#%'(),={}][^\s\"#%'(),={}]*") # Comment comments out to end of line comment = AT + CaselessLiteral("comment") + Regex("[\s{(].*").leaveWhitespace() # The name types with their digiteyness not_dig_lower = not_digname.copy().setParseAction(lambda t: t[0].lower()) macro_def = not_dig_lower.copy() macro_ref = not_dig_lower.copy().setParseAction(lambda t: Macro(t[0].lower())) field_name = not_dig_lower.copy() # Spaces in names mean they cannot clash with field names entry_type = not_dig_lower.setResultsName("entry type") cite_key = any_name.setResultsName("cite key") # Number has to be before macro name string = number | macro_ref | quoted_string | curly_string # There can be hash concatenation field_value = string + ZeroOrMore(HASH + string) field_def = Group(field_name + EQUALS + field_value) entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def)) # Entry is surrounded either by parentheses or curlies entry = AT + entry_type + bracketed(cite_key + COMMA + entry_contents) # Preamble is a macro-like thing with no name preamble = AT + CaselessLiteral("preamble") + bracketed(field_value) # Macros (aka strings)