def getLogLineBNF(): global logLineBNF if logLineBNF is None: integer = Word(nums) ipAddress = delimitedList(integer, ".", combine=True) timeZoneOffset = Word("+-", nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Group( Suppress("[") + Combine(integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer) + timeZoneOffset + Suppress("]")) logLineBNF = ( ipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word(alphas + nums + "@._")).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent") + dblQuotedString.setResultsName("referrer").setParseAction( removeQuotes) + dblQuotedString.setResultsName( "clientSfw").setParseAction(removeQuotes)) return logLineBNF
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join([unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress(self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine(namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def build_select_grammar(): select_grammar = Forward() select_keyword_token = Keyword("select", caseless=True) from_keyword_token = Keyword("from", caseless=True) limit_keyword_token = Keyword("limit", caseless=True) order_by_keyword_token = Keyword("order by", caseless=True) where_keyword_token = Keyword("where", caseless=True) operators_tokens = oneOf("= != < > >= <=") column_name_tokens = Group(delimitedList(identifier_token, ",")) order_by_token = order_by_keyword_token + column_name_tokens.setResultsName("order_by_cols")\ + Optional( (Keyword("asc", caseless=True).setResultsName("order_by_type") | Keyword("desc", caseless=True).setResultsName("order_by_type")) ) limit_token = limit_keyword_token + Optional(Word(nums).setResultsName("offset") + Literal(",")) \ + Word(nums).setResultsName("rows_limit") where_expression = where_keyword_token + identifier_token.setResultsName("operand_left") \ + operators_tokens.setResultsName("operator") + Word(alphanums).setResultsName("operand_right") select_grammar << select_keyword_token + ('*' | column_name_tokens).setResultsName("columns")\ + from_keyword_token + table_name_token.setResultsName("table")\ + Optional(where_expression).setResultsName("where")\ + Optional(order_by_token).setResultsName("order")\ + Optional(limit_token).setResultsName("limit") return select_grammar
def getLogLineBNF(): global logLineBNF if logLineBNF is None: integer = Word( nums ) ipAddress = delimitedList( integer, ".", combine=True ) timeZoneOffset = Word("+-",nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Group( Suppress("[") + Combine( integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer ) + timeZoneOffset + Suppress("]") ) logLineBNF = ( ipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent") + dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) + dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) ) return logLineBNF
def parseEqun(equation): cForm = Word(ascii_uppercase, ascii_uppercase + ascii_lowercase + digits) equnExpr = Group(ZeroOrMore(cForm + Suppress('+')) + cForm) lhs = equnExpr.setResultsName('lhs') rhs = equnExpr.setResultsName('rhs') chemicalEqun = lhs + "->" + rhs parsedEqun = chemicalEqun.parseString(equation) LHS = parsedEqun['lhs'].asList() RHS = parsedEqun['rhs'].asList() lhsDict = {} rhsDict = {} element = Word(ascii_uppercase, ascii_lowercase) integer = Word(digits).setParseAction(lambda x: int(x[0])) elementRef = Group(element + Optional(integer, default=1)) chemicalFormula = OneOrMore(elementRef) for chemical in LHS: lhsDict[chemical] = Counter() for element, count in chemicalFormula.parseString(chemical): lhsDict[chemical][element] += count for chemical in RHS: rhsDict[chemical] = Counter() for element, count in chemicalFormula.parseString(chemical): rhsDict[chemical][element] += count return lhsDict, rhsDict
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join( [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress( self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine( namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def get_grammar(): ident = alphas + '_' column = Word(ident) literal = Combine( Word(nums) + Optional(oneOf(' '.join(LITERAL_SUFFIXES), caseless=True))) | sglQuotedString funcall = Forward() value = funcall | column | literal funcall << Group(Word(ident) + Suppress('(') + Group(delimitedList(value)) + Suppress(')')) bin_op = oneOf(' '.join(OPERATORS), caseless=True) columns = (Group(delimitedList(value)) | '*').setResultsName('columns') from_clause = (CaselessKeyword('FROM') + QuotedString("'").setResultsName('directory')) condition = (Group(Optional(CaselessKeyword('NOT')) + value + bin_op + value) | Group(Optional(CaselessKeyword('NOT')) + value)) conditions = Group(delimitedList(condition, delim=CaselessKeyword('AND'))) where_clause = CaselessKeyword('WHERE') + conditions.setResultsName('condition') order_by_clause = (CaselessKeyword('ORDER BY') + Group( value + Optional(CaselessKeyword('ASC') | CaselessKeyword('DESC'))).setResultsName( 'order_by')) limit_clause = CaselessKeyword('LIMIT') + Word(nums).setResultsName('limit') select_clause = CaselessKeyword('SELECT') + columns return (Optional(select_clause) + Optional(from_clause) + Optional(where_clause) + Optional(order_by_clause) + Optional(limit_clause))
def parse(str): tokens = '' # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = Upcase(delimitedList(ident, ".", combine=True)) columnNameList = Group(delimitedList(columnName)) tableName = Upcase(delimitedList(ident, ".", combine=True)) tableNameList = Group(delimitedList(tableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group((columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + selectStmt + ")") | ("(" + whereExpression + ")")) whereExpression << whereCondition + ZeroOrMore( (and_ | or_) + whereExpression) # define the grammar selectStmt << (selectToken + ('*' | columnNameList).setResultsName("columns") + fromToken + tableNameList.setResultsName("tables") + Optional(Group(CaselessLiteral("where") + whereExpression), "").setResultsName("where")) simpleSQL = selectStmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine simpleSQL.ignore(oracleSqlComment) try: tokens = simpleSQL.parseString(str) except ParseException, err: print " " * err.loc + "^\n" + err.msg print err
def _create_filter_parser(): and_kw = Keyword('AND') or_kw = Keyword('OR') variable = Literal('?') + Word(alphanums + '_').leaveWhitespace() uri_term = NotAny(Literal('"')) + Word(printables, excludeChars='>*') uri_part = Keyword('*') ^ uri_term ^ variable literal_term = QuotedString(quoteChar='"', escChar='\\') triple = Group(Literal('<').suppress() + uri_part.setResultsName('subj') + uri_part.setResultsName('pred') + (Group(uri_part).setResultsName('obj') ^ Group(literal_term).setResultsName('objlit')) + Literal('>').suppress()) expr = Forward() atom = (triple.setResultsName('triple') | Literal('(').suppress() + expr + Literal(')').suppress()) and_group = Group(atom + ZeroOrMore(and_kw.suppress() + atom)) or_group = Group(atom + ZeroOrMore(or_kw.suppress() + atom)) expr << (and_group.setResultsName('and') ^ or_group.setResultsName('or')) return expr
def _create_parser(self): semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) lt = Suppress(Word("<")) gt = Suppress(Word(">")) eq = Suppress(Word("=")) identifier = Word(alphas + "_", alphanums + "_") typeIdentifier = Word(alphas + "_", alphanums + "_:") structIdentifer = Group( typeIdentifier.setResultsName('type') + identifier.setResultsName('identifier') + Optional(eq) + Optional( CharsNotIn(";").setResultsName('defaultValue')) + semicolon) structIdentifers = Group(OneOrMore(structIdentifer)) ## Imports idslImport = Suppress(Word("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) structDef = Word("struct").setResultsName('type') + identifier.setResultsName( 'name') + op + structIdentifers.setResultsName("structIdentifiers") + cl + semicolon dictionaryDef = Word("dictionary").setResultsName('type') + lt + CharsNotIn("<>").setResultsName( 'content') + gt + identifier.setResultsName('name') + semicolon sequenceDef = Word("sequence").setResultsName('type') + lt + typeIdentifier.setResultsName( 'typeSequence') + gt + identifier.setResultsName('name') + semicolon enumDef = Word("enum").setResultsName('type') + identifier.setResultsName('name') + op + CharsNotIn( "{}").setResultsName('content') + cl + semicolon exceptionDef = Word("exception").setResultsName('type') + identifier.setResultsName('name') + op + CharsNotIn( "{}").setResultsName('content') + cl + semicolon raiseDef = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore(Literal(',') + typeIdentifier) decoratorDef = Literal('idempotent') | Literal('out') retValDef = typeIdentifier.setResultsName('ret') firstParam = Group(Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName( 'type') + identifier.setResultsName('name')) nextParam = Suppress(Word(',')) + firstParam params = firstParam + ZeroOrMore(nextParam) remoteMethodDef = Group(Optional(decoratorDef.setResultsName('decorator')) + retValDef.setResultsName( 'ret') + typeIdentifier.setResultsName('name') + opp + Optional(params).setResultsName( 'params') + clp + Optional(raiseDef.setResultsName('raise')) + semicolon) interfaceDef = Word('interface').setResultsName('type') + typeIdentifier.setResultsName('name') + op + Group( ZeroOrMore(remoteMethodDef)).setResultsName('methods') + cl + semicolon moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef) module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore( moduleContent).setResultsName("contents") + cl + semicolon IDSL = idslImports.setResultsName("imports") + module.setResultsName("module") IDSL.ignore(cppStyleComment) return IDSL
def _create_filter_parser(): and_kw = Keyword('AND') or_kw = Keyword('OR') variable = Literal('?') + Word(alphanums + '_').leaveWhitespace() uri_term = NotAny(Literal('"')) + Word(printables, excludeChars='>*') uri_part = Keyword('*') ^ uri_term ^ variable literal_term = QuotedString(quoteChar='"', escChar='\\') triple = Group( Literal('<').suppress() + uri_part.setResultsName('subj') + uri_part.setResultsName('pred') + (Group(uri_part).setResultsName('obj') ^ Group(literal_term).setResultsName('objlit')) + Literal('>').suppress()) expr = Forward() atom = (triple.setResultsName('triple') | Literal('(').suppress() + expr + Literal(')').suppress()) and_group = Group(atom + ZeroOrMore(and_kw.suppress() + atom)) or_group = Group(atom + ZeroOrMore(or_kw.suppress() + atom)) expr << (and_group.setResultsName('and') ^ or_group.setResultsName('or')) return expr
def parse(str): tokens = '' # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = Upcase( delimitedList( ident, ".", combine=True ) ) columnNameList = Group( delimitedList( columnName ) ) tableName = Upcase( delimitedList( ident, ".", combine=True ) ) tableNameList = Group( delimitedList( tableName ) ) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group( ( columnName + binop + columnRval ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + selectStmt + ")" ) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) # define the grammar selectStmt << ( selectToken + ( '*' | columnNameList ).setResultsName( "columns" ) + fromToken + tableNameList.setResultsName( "tables" ) + Optional( Group( CaselessLiteral("where") + whereExpression ), "" ).setResultsName("where") ) simpleSQL = selectStmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine simpleSQL.ignore( oracleSqlComment ) try: tokens = simpleSQL.parseString( str ) except ParseException, err: print " "*err.loc + "^\n" + err.msg print err
def get_parser(): """Return a lyrics file parser. @see grammar.md for the whole grammar.""" if Lyrics._parser is None: # Parser not yet defined. Defining it. comment_line = COMMENT_SIGN + WORDS + EOL comments = Group(ZeroOrMore(comment_line)) section = Section.get_parser() sections = section + ZeroOrMore(EOL + section) Lyrics._parser = comments.setResultsName("comments") + sections.setResultsName("sections") return Lyrics._parser
def get_parser(): """Return a lyrics file parser. @see grammar.md for the whole grammar.""" if Lyrics._parser is None: # Parser not yet defined. Defining it. comment_line = COMMENT_SIGN + WORDS + EOL comments = Group(ZeroOrMore(comment_line)) section = Section.get_parser() sections = section + ZeroOrMore(EOL + section) Lyrics._parser = comments.setResultsName("comments") \ + sections.setResultsName("sections") return Lyrics._parser
def parser(text): """ str := \w+ str := '\w+' exp := Var=str exp := exp & exp exp := exp ^ exp """ # grammar #g_string = "'"+Word(alphas)+"'" | Word(alphas) g_quote = Literal("'").suppress() g_text = Regex("[\w\s\:\#\.]+").setResultsName("text") g_string = Optional(g_quote) + g_text + Optional(g_quote) g_equ = Literal("!=").setResultsName("connector") | Literal("=").setResultsName("connector") g_amp = Literal("&").setResultsName("connector") g_hat = Literal("^").setResultsName("connector") g_or = Literal("|").suppress() g_seq = Literal("->").setResultsName("connector") g_hash = Literal("#").setResultsName("hash") g_left_brack = Literal("[").suppress() g_right_brack = Literal("]").suppress() g_vals = Forward() g_vals << g_string + ZeroOrMore(Group(g_or + g_vals).setResultsName("or_group")) # working """ exp_basic = Group(Optional(g_hash) + g_string).setResultsName("left") + g_equ + Group(g_vals).setResultsName("right") exp = Group(exp_basic) exp = exp.setResultsName("left") + g_amp + exp.setResultsName("right") | \ g_left_brack + exp.setResultsName("left") + g_hat + exp.setResultsName("right") + g_right_brack | \ g_left_brack + exp.setResultsName("left") + g_seq + exp.setResultsName("right") + g_right_brack | \ exp_basic """ # recursion simpleq = Forward() complexq = Forward() exp = (simpleq | complexq).setResultsName("exp") exp_basic = Group(Group(Optional(g_hash) + g_string).setResultsName("left") + g_equ + Group(g_vals).setResultsName("right")) simpleq << (Group(exp_basic.setResultsName("left") + g_amp + simpleq.setResultsName("right")) | exp_basic) complexq << ( Group(g_left_brack + exp.setResultsName("left") + g_hat + exp.setResultsName("right") + g_right_brack) | \ Group(g_left_brack + exp.setResultsName("left") + g_seq + exp.setResultsName("right") + g_right_brack) ) return exp.parseString(text)
def build_insert_grammar(): insert_grammar = Forward() insert_into_keyword_token = Keyword("insert into", caseless=True) values_token = Keyword("values", caseless=True) columns = Optional(Group(delimitedList(identifier_token, ","))) values_list_token = Group(delimitedList(Word(alphanums + " "), ",")) insert_grammar << insert_into_keyword_token + table_name_token.setResultsName("table_name") \ + Literal("(") + columns.setResultsName("columns") + Literal(")") + \ values_token + Literal("(") + values_list_token.setResultsName("values_list") + Literal(")") return insert_grammar
def _create_block_bnf(): global block_bnf, time_interval, slot_id, statement, block_id, ident, stream if block_bnf is not None: return trigger_clause = Group(stream_trigger | Group(stream).setResultsName('stream_always') | Group(ident).setResultsName('identifier')) every_block_id = Group( Literal(u'every').suppress() - (time_interval | tick_interval)).setResultsName('every_block') when_block_id = Group( Literal(u'when').suppress() + Literal("connected").suppress() - Literal("to").suppress() - slot_id).setResultsName('when_block') latch_block_id = Group(Literal(u'when').suppress() - stream_trigger).setResultsName('latch_block') config_block_id = Group(Literal(u'config').suppress() - slot_id).setResultsName('config_block') on_block_id = Group( Literal(u'on').suppress() - trigger_clause.setResultsName('triggerA') - Optional((Literal("and") | Literal("or")) - trigger_clause.setResultsName('triggerB'))).setResultsName( 'on_block') # Keep track of the location where the match started for error handling locator = Empty().setParseAction(lambda s, l, t: l)('location') block_id = Group(locator + (every_block_id | when_block_id | latch_block_id | config_block_id | on_block_id)) block_bnf = Forward() statement = generic_statement | block_bnf block_bnf << Group(block_id + Group( Literal(u'{').suppress() + ZeroOrMore(statement) + Literal(u'}').suppress())).setResultsName('block')
def simple_query(): # IGNORE:too-many-locals ''' Grammar for simple queries <simple-query> ::= 'SELECT' ['DISTINCT'] ['TOP('<integer>')'] <select-term> (',' <select-term>)* 'FROM' <table-term> (',' <table-term>)* [<where-condition>] [['ACCORDING' 'TO'] 'PREFERENCES' <theory-grammar>] ['GROUP BY' <identifier> (',' <identifier>)* ] ''' from grammar.keywords import AND_KEYWORD, \ ACCORDING_KEYWORD, TO_KEYWORD, DISTINCT_KEYWORD, \ PREFERENCES_KEYWORD, SELECT_KEYWORD, FROM_KEYWORD, WHERE_KEYWORD, \ GROUP_KEYWORD, BY_KEYWORD, OR_KEYWORD from grammar.basic import attribute_term from grammar.symbols import COMMA from grammar.theory import TheoryGrammar from grammar.parsed import ParsedSimpleQuery select_clause = \ Suppress(SELECT_KEYWORD) + \ Optional(DISTINCT_KEYWORD).setResultsName('distinct') + \ Optional(top_term()).setResultsName('top') + \ delimitedList(select_term(), COMMA).setResultsName('selected') group_by_clause = Suppress(GROUP_KEYWORD) + Suppress(BY_KEYWORD) + \ delimitedList(attribute_term(), COMMA) preference_clause = Optional(Suppress(ACCORDING_KEYWORD + TO_KEYWORD)) + \ Suppress(PREFERENCES_KEYWORD) + TheoryGrammar.grammar() where_single = Group(where_term()) where_or = Suppress(OR_KEYWORD) + \ delimitedList(Group(where_term()), OR_KEYWORD) where_and = Suppress(AND_KEYWORD) + \ delimitedList(Group(where_term()), AND_KEYWORD) where_t = (where_or.setResultsName('where_or') | where_and.setResultsName('where_and')) simple_q = select_clause.setResultsName('select_clause') + \ Suppress(FROM_KEYWORD) + \ delimitedList(table_term(), COMMA).setResultsName('from_clause') + \ Optional(Suppress(WHERE_KEYWORD) + where_single.setResultsName('where_clause') + Optional(where_t)) + \ Optional(group_by_clause.setResultsName('group_clause') | preference_clause.setResultsName('preference_clause')) simple_q.setParseAction(ParsedSimpleQuery) return simple_q
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: try: unicode_numbers = "".join( [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) except NameError: unicode_numbers = "".join( [chr(n) for n in range(0x10000) if chr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress( self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine( namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") expop = Literal('^') multop = oneOf('* /') factop = Literal('!') modop = Literal('%') signop = oneOf('+ -') opers = expop | signop | multop | factop | modop identifier = identifier + NotAny(opers) return identifier
def buildBNF(): integer = Word(nums) ipAddress = delimitedList(integer, ".", combine=True) timeZoneOffset = Word("+-", nums) month = Word(string.ascii_uppercase, string.ascii_lowercase, exact=3) serverDateTime = Group( Suppress("[") + Combine(integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer) + timeZoneOffset + Suppress("]")) loglineBNF = ( ipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word(alphas + nums + "@._")).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getFields) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent")) return loglineBNF
def fromString(inputText, verbose=False): if verbose: print 'Verbose:', verbose text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) identifier = Word( alphas+"_", alphanums+"_" ) commIdentifier = Group(identifier.setResultsName('identifier') + Optional(opp + (CaselessLiteral("ice")|CaselessLiteral("ros")).setResultsName("type") + clp)) # Imports idslImport = Suppress(CaselessLiteral("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) # Communications implementsList = Group(CaselessLiteral('implements') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) requiresList = Group(CaselessLiteral('requires') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) subscribesList = Group(CaselessLiteral('subscribesTo') + commIdentifier + ZeroOrMore(Suppress(Word(',')) + commIdentifier) + semicolon) publishesList = Group(CaselessLiteral('publishes') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) communicationList = implementsList | requiresList | subscribesList | publishesList communications = Group( Suppress(CaselessLiteral("communications")) + op + ZeroOrMore(communicationList) + cl + semicolon) # Language language = Suppress(CaselessLiteral("language")) + (CaselessLiteral("cpp")|CaselessLiteral("python")) + semicolon # GUI gui = Group(Optional(Suppress(CaselessLiteral("gui")) + CaselessLiteral("Qt") + opp + identifier + clp + semicolon )) # additional options options = Group(Optional(Suppress(CaselessLiteral("options")) + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon)) componentContents = communications.setResultsName('communications') & language.setResultsName('language') & gui.setResultsName('gui') & options.setResultsName('options') component = Suppress(CaselessLiteral("component")) + identifier.setResultsName("name") + op + componentContents.setResultsName("properties") + cl + semicolon CDSL = idslImports.setResultsName("imports") + component.setResultsName("component") CDSL.ignore( cppStyleComment ) tree = CDSL.parseString(text) return CDSLParsing.component(tree)
def __get_weblog_grammar(): integer = Word(nums) ip_address = delimitedList(integer, ".", combine=True) time_zone_offset = Word("+-", nums) month = Word(string.ascii_uppercase, string.ascii_lowercase, exact=3) server_date_time = Group( Combine("[" + integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer) + Combine(time_zone_offset + "]")) weblog_grammar = ( ip_address.setResultsName("ip_address") + Word("-").setResultsName("dash") + ("-" | Word(alphas + nums + "@._")).setResultsName("auth") + server_date_time.setResultsName("timestamp") + dblQuotedString.setResultsName("command") + (integer | "-").setResultsName("status_code") + (integer | "-").setResultsName("num_bytes") + dblQuotedString.setResultsName("referrer") + dblQuotedString.setResultsName("client_agent")) return weblog_grammar
"{" + ZeroOrMore(resource)("resources") + "}" ) constant = Group( Optional(comment)("doc") + Keyword("const") + basic_type("type") + identifier("name") + "=" + signed_literal("value") + ";" ) flatdata_entry = ( enum.setResultsName("enumerations", listAllMatches=True) | struct.setResultsName("structures", listAllMatches=True) | archive.setResultsName("archives", listAllMatches=True) | constant.setResultsName("constants", listAllMatches=True) | comment.setResultsName("comment", listAllMatches=True) ) free_comments = Optional(OneOrMore(comment)("comment")) namespace = Group( Keyword("namespace") + qualified_identifier("name") + "{" + ZeroOrMore(flatdata_entry) + "}" + Optional(comment) )
def create_where(): """ Create the grammar for a 'where' clause """ where_exp = Group(constraint + ZeroOrMore(Suppress(and_) + constraint)) return where_ + where_exp.setResultsName('where')
channels = Group(OneOrMore(channel)) rotationOrder = Word("XYZ", exact=3) begin = Suppress(Keyword("begin")) end = Suppress(Keyword("end")) bonename = Combine(~end + Word(alphanums+"_-")).setWhitespaceChars(' ') version = Keyword(":version") + Literal("1.10") skeletonName = Keyword(":name") + bonename.setResultsName('name') unitDefinition = Group(Word(alphas) + (floatValue | intValue | Word(alphas))) unitSection = Keyword(":units") + \ Dict(ZeroOrMore(unitDefinition)).setResultsName('units') documentationSection = Keyword(':documentation') + \ SkipTo(":").setResultsName('documentation') rootSection = Group(Keyword(":root") & (Keyword("order") + channels.setResultsName('channels')) & (Keyword("position") + floatVector.setResultsName('position')) & (Keyword("axis") + rotationOrder.setResultsName("axisRotationOrder")) & (Keyword("orientation") + floatVector.setResultsName("axis")) ).setResultsName('root') bone = Group( begin + Keyword("id") + intValue + Keyword("name") + bonename.setResultsName("name") + Keyword("direction") + floatVector.setResultsName("direction") +
havingExpression = Forward() whereColumn = (( caseStart + SkipTo(caseEnd, include=True)) | ( Optional("(") + colIdent + Optional("(") + ZeroOrMore(arithop + Optional("(") + colIdent + Optional(")")) + Optional(")") ) ) columnRval = realNum | intNum | quotedString | whereColumn # need to add support for alg expressions condition = Group( ( whereColumn + binop + columnRval ) | ( whereColumn + Optional(not_) + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( whereColumn + is_ + Optional(not_) + null ) | ( whereColumn + between_ + columnRval + and_ + columnRval) | ( "(" + whereExpression + ")" ) ) whereExpression << condition.setResultsName("where", listAllMatches=True) + ZeroOrMore( ( and_ | or_ ) + whereExpression ) havingExpression << condition.setResultsName("having", listAllMatches=True) + ZeroOrMore( ( and_ | or_ ) + havingExpression ) groupByList = delimitedList( columnName.setResultsName("groupby", listAllMatches=True) ) # define the grammar selectStmt << ( selectToken + ( Optional(distinct_) + columnNameList ) + fromToken + ( tableName.setResultsName("tables", listAllMatches=True ) | ("(" + selectStmt + Optional(")")) ) + ZeroOrMore( SkipTo(joinToken, include=True, failOn=whereToken) + (("(" + selectStmt + Optional(")")) | tableName.setResultsName("tables", listAllMatches=True)) ) + Optional(SkipTo(whereToken, include=True, failOn=")") + whereExpression) + Optional(SkipTo(groupToken, include=True, failOn=")") + groupByList) + Optional(SkipTo(havingToken, include=True, failOn=")") + havingExpression))
+ Combine(ident).setResultsName('name') + as_kw + select_stmt ) # # Define asql index grammar. # index_source = delimitedList(source_ident, '.', combine=True) index_kw = Keyword('index', caseless=True) _index_stmt = Forward() _index_stmt << ( Optional(create_kw) + index_kw + index_source.setResultsName('source') + '(' + column_name_list.setResultsName('columns') + ')') # Examples: # index = index_stmt.parseString('INDEX partition1 (col1, col2, col3);') # print(index.source) # 'partition1' # print(index.columns) # ['col1', 'col2', 'col3'] # define Oracle comment format, and ignore them oracle_sql_comment = '--' + restOfLine _view_stmt.ignore(oracle_sql_comment) _index_stmt.ignore(oracle_sql_comment) def substitute_vids(library, statement):
( columnName + CaselessLiteral('is') + CaselessLiteral('null')) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) orderByToken = CaselessLiteral("order by") limitToken = CaselessLiteral("limit") offsetToken = CaselessLiteral("offset") asc_ = CaselessLiteral('ASC') desc_ = CaselessLiteral('DESC') # define the grammar selectStmt << (selectToken + (columnNameList.setResultsName( "columns" ) + fromToken + (collection | rootEntity)) + Optional( Group( CaselessLiteral("where") + whereExpression ).setResultsName("where") , "" ) + Optional(orderByToken.suppress() + delimitedList(Group(columnName + Optional(asc_ | desc_))) .setResultsName('direction')).setResultsName("orderBy") + Optional(limitToken.suppress() + Word(nums)).setResultsName("limit") + Optional(offsetToken.suppress() + Word(nums)).setResultsName("offset") ).setParseAction(convertWildCards) deleteStmt = (deleteToken +
def parse_morphology(filename, filename_toparse): global current_section_name current_section_name = '' converted_file = open(filename, 'w') put_string = 'from neuron import h\ndef shape_3D(self):\n' converted_file.write(put_string) ntabs = 1 # from here on, add a tab to all lines # define lists of characters for a..z and 1..9 uppercase = lowercase.upper() lowercaseplus = lowercase+('_') lowercaseplus = lowercaseplus+(uppercase) nonzero = ''.join([str(i) for i in range(1, 10)]) COMMA = Literal(',') EQUALS = Literal('=') MINUS = Literal('-') PERIOD = Literal('.') LCURL = Literal('{') RCURL = Literal('}') LBRACK = Literal('(') RBRACK = Literal(')') LSQUARE = Literal('[') RSQUARE = Literal(']') PTSCLEAR = Literal('{pt3dclear()').suppress() PTSCLEARNL = Literal('{\npt3dclear()\n').suppress() integer = Word(nums) single_section = Word(lowercaseplus, min = 2) single_section.setResultsName('SINGLE') integer_var = Word(lowercase, exact = 1) double = Group(Optional(MINUS) + integer + Optional(PERIOD + integer)) operand = integer ^ integer_var operator = Word('+-*/', exact=1) unaryoperation = operand binaryoperation = operand + operator + operand operation = unaryoperation ^ binaryoperation array_section = Group(single_section + LSQUARE.suppress() + operation + RSQUARE.suppress()) array_section.setResultsName('ARRAY') section = single_section ^ array_section section_location = Group(section + LBRACK.suppress() + double + RBRACK.suppress()) create = Keyword('create').suppress() + section + ZeroOrMore(COMMA.suppress() + section) create.setParseAction(print_create(converted_file, ntabs)) connect = Keyword('connect').suppress() + section_location + COMMA.suppress() + section_location connect.setParseAction(print_connect(converted_file, ntabs)) for_loop = Keyword('for').suppress() + integer_var + EQUALS.suppress() + integer + COMMA.suppress() + integer # NOTE TO FUTURE SELF: for loops can only have one line of code in this implementation for_loop.setParseAction(print_for_loop(converted_file, ntabs)) point_add = Literal('pt3dadd(').suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + RBRACK.suppress() point_add.setParseAction(print_point_add(converted_file, ntabs)) point_style = Literal('pt3dstyle(').suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + RBRACK.suppress() point_style.setParseAction(print_point_style(converted_file, ntabs)) geom_define_pre = section + (PTSCLEAR ^ PTSCLEARNL) geom_define_body = OneOrMore(point_add ^ point_style) + RCURL.suppress() geom_define_pre.setParseAction(update_current_section(converted_file, ntabs)) geom_define = geom_define_pre + geom_define_body expression = (connect ^ for_loop ^ geom_define ^ create) codeblock = OneOrMore(expression) test_str = 'Ia_node[0] {\npt3dclear()\n pt3dadd( 47, 76, 92.5, 3.6) }' #file_to_parse = open('../../tempdata/Ia_geometry') file_to_parse = open(filename_toparse) tokens = codeblock.parseString(file_to_parse.read())
item = word ^ pair1 plus = Suppress(Literal('+')) plusitem = OneOrMore(plus) + item endPunctuation = Suppress(Literal("'")) trailing = OneOrMore(plusitem) + endPunctuation ignoredTrailing = Word(printables, excludeChars="'") + endPunctuation UNBLine = "UNB" + Suppress(ignoredTrailing) UNHLine = "UNH" + Suppress(ignoredTrailing) MSGLine = "MSG" + Suppress(ignoredTrailing) ODILine = Suppress("ODI") + trailing TVLLine = Suppress("TVL") + trailing PDILine = Group(Suppress("PDI") + trailing) TVLEntity = Group(TVLLine('TVL') + PDILine('PDI')) TVLEntity1 = TVLEntity("TVLEntity*") TVLEntity2 = Group(OneOrMore(TVLEntity1)) ODIEntity = Group(ODILine('ODI') + TVLEntity2.setResultsName("TVLEntity2")) ODIEntity1 = ODIEntity("ODIEntity*") ODIEntity2 = Group(OneOrMore(ODIEntity1)) UNTLine = "UNT" + Suppress(ignoredTrailing) UNZLine = "UNZ" + Suppress(ignoredTrailing) edifact = UNBLine + Optional(UNHLine) + Optional(MSGLine) + ODIEntity2.setResultsName("ODIEntity2") + Optional(UNTLine) + Optional(UNZLine) ##teststring = "UNB+IATA:1+AA:IEDI+X1:IEDI+161012:2143+00000000'" with open(sys.argv[1], 'r') as myfile: data=myfile.read().replace('\n', '') #for tokens,startloc,endloc in edifact.scanString(data): # print tokens.ODI # print "\n"
printusascii = printables sp = White(" ", exact=1) octet = Regex("[\x00-\xFF]") utf_8_string = Regex("[\x00-\xFF]*") BOM = "\xef\xbb\xbf" bom = Regex(BOM) msg_utf8 = bom + utf_8_string msg_any = utf_8_string msg = Combine(Or([msg_utf8, msg_any])).setResultsName("MSG") sd_name = CharsNotIn('= ]"', 1, 32) param_name = sd_name.setResultsName("SD_PARAM_NAME") param_value = QuotedString(quoteChar='"', escChar="\\", multiline=True) param_value = param_value.setResultsName("SD_PARAM_VALUE") sd_id = sd_name.setResultsName("SD_ID") sd_param = Group(param_name + Regex("=") + param_value) sd_params = Group(ZeroOrMore(Group(sp + sd_param.setResultsName("SD_PARAM")))) sd_element = Group("[" + sd_id + sd_params.setResultsName("SD_PARAMS") + "]") sd_element = sd_element.setResultsName("SD_ELEMENT") sd_elements = Group(OneOrMore(sd_element)) structured_data = Or([nilvalue, sd_elements.setResultsName("SD_ELEMENTS")]) structured_data = structured_data.setResultsName("STRUCTURED_DATA") time_hour = Regex("0[0-9]|1[0-9]|2[0-3]") time_minute = Regex("[0-5][0-9]") time_second = time_minute time_secfrac = Regex("\.[0-9]{1,6}") time_numoffset = Or([Regex("\+"), Regex("-")]) + time_hour + ":" + time_minute time_offset = Or([Regex("Z"), time_numoffset]) partial_time = time_hour + ":" + time_minute + ":" + time_second + Optional(time_secfrac) full_time = partial_time + time_offset date_mday = Regex("[0-9]{2}") date_month = Regex("0[1-9]|1[0-2]")
printusascii = printables sp = White(" ", exact=1) octet = Regex('[\x00-\xFF]') utf_8_string = Regex('[\x00-\xFF]*') BOM = '\xef\xbb\xbf' bom = Regex(BOM) msg_utf8 = bom + utf_8_string msg_any = utf_8_string msg = Combine(Or([msg_utf8, msg_any])).setResultsName('MSG') sd_name = CharsNotIn('= ]"', 1, 32) param_name = sd_name.setResultsName('SD_PARAM_NAME') param_value = QuotedString(quoteChar='"', escChar='\\', multiline=True) param_value = param_value.setResultsName('SD_PARAM_VALUE') sd_id = sd_name.setResultsName('SD_ID') sd_param = Group(param_name + Regex('=') + param_value) sd_params = Group(ZeroOrMore(Group(sp + sd_param.setResultsName('SD_PARAM')))) sd_element = Group('[' + sd_id + sd_params.setResultsName('SD_PARAMS') + ']') sd_element = sd_element.setResultsName('SD_ELEMENT') sd_elements = Group(OneOrMore(sd_element)) structured_data = Or([nilvalue, sd_elements.setResultsName('SD_ELEMENTS')]) structured_data = structured_data.setResultsName('STRUCTURED_DATA') time_hour = Regex('0[0-9]|1[0-9]|2[0-3]') time_minute = Regex('[0-5][0-9]') time_second = time_minute time_secfrac = Regex('\.[0-9]{1,6}') time_numoffset = Or([Regex('\+'), Regex('-')]) + time_hour + ':' + time_minute time_offset = Or([Regex('Z'), time_numoffset]) partial_time = time_hour + ':' + time_minute + ':' + time_second + \ Optional(time_secfrac) full_time = partial_time + time_offset date_mday = Regex('[0-9]{2}')
relation_op = diff_op | greater_then_op | less_than_op | greater_op | less_op | equal_op relation = Group(arith_expr) + ZeroOrMore(relation_op + Group(arith_expr)) # Define boolean-expression bool_factor << relation bool_term << Optional(NOT) + bool_factor + ZeroOrMore(AND + Optional(NOT) + bool_factor) bool_expr << Group(bool_term) + ZeroOrMore(OR + Group(bool_term)) # Where Clause where_clause = Group(WHERE + bool_expr).setResultsName("where_clause") # From-clause # bnf-from_table = table ( (AS){0,1} alias)* from_table = Group(table_name + Optional(Optional(AS) + alias)) from_table = from_table.setResultsName("from_table") from_tables = delimitedList(from_table) # tbl1 as t, tbl2 as b,... from_tables = from_tables.setResultsName("from_tables") from_clause = Group(FROM + from_tables).setResultsName("from_clause") # Select Statement star = Literal("*") attribute = star | Group(arith_expr + Optional(AS + alias)) projected_attrs = delimitedList(attribute) projected_attrs = projected_attrs.setResultsName("projected_attributes") select_stmt = (SELECT + Optional(DISTINCT | ALL) + projected_attrs + from_clause + Optional(where_clause)) # Insert statement # Contrary to standard SQL, our implementation of the Insert-Statement does not # consider the list-of-columns. To reduce the complexity of the execution of an
arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = ZeroOrMore( ( columnName + binop + columnRval ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + ZeroOrMore(selectToken + ( '*' | columnNameList ).setResultsName( "nestedcolumns" ) + fromToken + tableNameList.setResultsName( "nestedtables" ) + Optional( ZeroOrMore( CaselessLiteral("where") + whereExpression ), "" ).setResultsName("nestedwhere")) + ")" ) | ( "(" + whereExpression + ")" ) | ( columnName + in_ + "(" + ZeroOrMore(selectToken + ( '*' | columnNameList ).setResultsName( "nestedcolumns" ) + fromToken + tableNameList.setResultsName( "nestedtables" ) + Optional( ZeroOrMore( CaselessLiteral("where") + whereExpression ), "" ).setResultsName("nestedwhere")) + ")" ) | ( "(" + whereExpression + ")" ) + Optional( oper + "(" + ZeroOrMore(selectToken + ( '*' | columnNameList ).setResultsName( "nestedcolumns" ) + fromToken + tableNameList.setResultsName( "nestedtables" ) + Optional( ZeroOrMore( CaselessLiteral("where") + whereExpression ), "" ).setResultsName("nestedwhere")) + ")" ) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression )
def __init__(self): # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) asToken = Keyword("as", caseless=True) whereToken = Keyword("where", caseless=True) semicolon = Literal(";") ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = delimitedList( ident, ".", combine=True ) #columnName.setParseAction(upcaseTokens) columnNameList = Group( columnName + ZeroOrMore("," + columnName)) # selectableList = Forward() columnRvalList = Forward() functionExpr = ident + Optional("."+ident) + Literal('(') + columnRvalList + Literal(')') alias = Forward() identExpr = functionExpr | ident self.identExpr = identExpr # Debug self.functionExpr = functionExpr # Debug alias = ident.copy() selectableName = identExpr | columnName selectableList = Group( selectableName + ZeroOrMore(","+selectableName)) columnRef = columnName functionSpec = functionExpr valueExprPrimary = functionSpec | columnRef numPrimary = valueExprPrimary ## | numericValFunc factor = Optional(Literal("+") | Literal("-")) + numPrimary muldiv = oneOf("* /") term = Forward() term << factor + Optional(muldiv + factor) numericExpr = Forward() addsub = oneOf("+ -") numericExpr << term + Optional(addsub + numericExpr) arithop = oneOf("+ - * /") columnNumericExpr = Forward() cTerm = valueExprPrimary testme = valueExprPrimary + arithop + valueExprPrimary columnNumericExpr << cTerm + Optional(arithop + columnNumericExpr) colNumExpList = Group( columnNumericExpr + ZeroOrMore(","+columnNumericExpr)) valueExpr = numericExpr ## | stringExpr | dateExpr | intervalExpr derivedColumn = valueExpr + Optional(asToken + alias) selectSubList = derivedColumn + ZeroOrMore("," + derivedColumn) tableName = delimitedList( ident, ".", combine=True ) # don't upcase table names anymore # tableName.setParseAction(upcaseTokens) self.tableAction = [] tableName.addParseAction(self.actionWrapper(self.tableAction)) tableName.setResultsName("table") tableAlias = tableName + asToken + ident.setResultsName("aliasName") tableAlias.setResultsName("alias") genericTableName = tableAlias | tableName genericTableName = genericTableName.setResultsName("tablename") tableNameList = Group( genericTableName + ZeroOrMore("," + genericTableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) between_ = Keyword("between", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) # need to add support for alg expressions columnRval = realNum | intNum | quotedString | columnNumericExpr# | numericExpr columnRvalList << Group( columnRval + ZeroOrMore("," + columnRval)) self.whereExpAction = [] namedRv = columnRval.setResultsName("column") whereConditionFlat = Group( ( functionSpec + binop + columnRval) | ( namedRv + binop + columnRval ) | ( namedRv + in_ + "(" + columnRval + ZeroOrMore(","+namedRv) + ")" ) | ( namedRv + in_ + "(" + selectStmt + ")" ) | ( namedRv + between_ + namedRv + and_ + namedRv ) ) whereConditionFlat.addParseAction(self.actionWrapper(self.whereExpAction)) whereCondition = Group(whereConditionFlat | ( "(" + whereExpression + ")" )) # Test code to try to make an expression parse. # print whereConditionFlat.parseString("ABS(o1.ra - o2.ra) < 0.00083 / COS(RADIANS(o2.decl))") # goodFunction = ident + Literal('(') + columnNumericExpr + Literal(')') # print "ADFDSFDSF",testme.parseString("o1.ra - o2.ra", parseAll=True) # print "ADSFDSFAD", goodFunction.parseString("ABS(o1.ra - o2.ra)") #whereExpression << whereCondition.setResultsName("wherecond") #+ ZeroOrMore( ( and_ | or_ ) + whereExpression ) def scAnd(tok): print "scAnd", tok if "TRUE" == tok[0][0]: tok = tok[2] elif "TRUE" == tok[2][0]: tok = tok[0] return tok def scOr(tok): print "scOr", tok if ("TRUE" == tok[0][0]) or ("TRUE" == tok[2][0]): tok = [["TRUE"]] return tok def scWhere(tok): newtok = [] i = 0 while i < len(tok): if str(tok[i]) in ["TRUE",str(["TRUE"])] and (i+1) < len(tok): if str(tok[i+1]).upper() == "AND": i += 2 continue elif str(tok[i+i]).upper() == "OR": break newtok.append(tok[i]) i += 1 return newtok def collapseWhere(tok): #collapse.append(tok[0][1]) if ["TRUE"] == tok.asList()[0][1]: tok = [] return tok andExpr = and_ + whereExpression orExpr = or_ + whereExpression whereExpression << whereCondition + ZeroOrMore( andExpr | orExpr) whereExpression.addParseAction(scWhere) self.selectPart = selectToken + ( '*' | selectSubList ).setResultsName( "columns" ) whereClause = Group(whereToken + whereExpression).setResultsName("where") whereClause.addParseAction(collapseWhere) self.fromPart = fromToken + tableNameList.setResultsName("tables") # define the grammar selectStmt << ( self.selectPart + fromToken + tableNameList.setResultsName( "tables" ) + whereClause) self.simpleSQL = selectStmt + semicolon # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine self.simpleSQL.ignore( oracleSqlComment )
or_ = Keyword("or", caseless=True) binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-",exact=1) intNum = Combine( Optional(arithSign) + Word( nums ) ) intNumList = Group( delimitedList(intNum) ) columnRval = intNum | columnName whereCondition = Group(columnRval + binop + columnRval) whereExpression = Group(whereCondition + Optional( (and_ | or_) + whereCondition)) # define the grammar selectStmt = ( selectToken.setResultsName( "statementtype" ) + ( '*' | columnNameList ).setResultsName( "columns" ) + fromToken + tableNameList.setResultsName( "tables" ) + Optional( Group( CaselessLiteral("where") + whereExpression ), "" ).setResultsName("where") ) '''createStmt = ( createTableToken.setResultsName( "statementtype" ) + ident.setResultsName("tablename") + '(' + fieldList.setResultsName("fieldlist") + ')') insertStmt = ( insertIntoToken.setResultsName( "statementtype" ) + ident.setResultsName("tablename") + valuesToken + '(' + intNumList.setResultsName("tablevalues") + ')' )
ZeroOrMore(Group(TUPLEDEF)).setResultsName("tuples") + SkipTo(RBRACE).suppress() + RBRACE) AGENT = ( Keyword("agent").suppress() + IDENTIFIER.setResultsName("name") + LBRACE + ( Optional(named_list("interface", Group(VARREF + COLON + INITIALIZER))) # noqa: E501 & Optional(named_list("stigmergies", IDENTIFIER))) + OneOrMore(Group(PROCDEF)).setResultsName("processes") + RBRACE) ASSUME = (Keyword("assume").suppress() + LBRACE + SkipTo(RBRACE) + RBRACE) CHECK = (Keyword("check").suppress() + LBRACE + SkipTo(RBRACE) + RBRACE) FILE = (SYSTEM.setResultsName("system") + ZeroOrMore(Group(STIGMERGY)).setResultsName("stigmergies") + OneOrMore(Group(AGENT)).setResultsName("agents") + Optional(ASSUME.setResultsName("assume")) + CHECK.setResultsName("check")).ignore(pythonStyleComment) def walk_flat(lst): if type(lst) in (list, tuple, ParseResults): for x in lst: yield from walk_flat(x) else: yield lst def walk_and_print(thing):
_sglQuote = Literal("'") _dblQuote = Literal('"') _escapables = printables _escapedChar = Word(_bslash, _escapables, exact=2) dblQuotedString = Combine( _dblQuote + ZeroOrMore( CharsNotIn('\\"\n\r') | _escapedChar | '""' ) + _dblQuote ).streamline().setName("string enclosed in double quotes") sglQuotedString = Combine( _sglQuote + ZeroOrMore( CharsNotIn("\\'\n\r") | _escapedChar | "''" ) + _sglQuote ).streamline().setName("string enclosed in single quotes") quotedArg = ( dblQuotedString | sglQuotedString ) quotedArg.setParseAction(removeQuotes) quotedArg.setName("quotedArg") plainArgChars = printables.replace('#', '').replace('"', '').replace("'", "") plainArg = Word(plainArgChars) plainArg.setName("plainArg") arguments = Group(ZeroOrMore(quotedArg | plainArg)) arguments = arguments.setResultsName('arguments') arguments.setName("arguments") # comment line. comment = Literal('#') + restOfLine comment = comment.suppress() comment.setName('comment') full_command = ( comment | (command + arguments + Optional(comment)) ) full_command.setName('full_command') ###
whereExpression << whereCondition + ZeroOrMore((and_ | or_) + whereExpression) # GROUP BY groupByExpression = Group(delimitedList(columnDef)) # ORDER BY orderByExpression = Group(delimitedList(columnDef + Optional(CaselessLiteral("DESC") | CaselessLiteral("ASC")))) # LIMIT limitExpression = intNum # OFFSET offsetExpression = intNum # define the grammar selectColumnList = Group(delimitedList(Group(columnDef + aliasDef))) selectStmt << ( selectToken + ('*' | selectColumnList).setResultsName("columns") + fromToken + tableNameList.setResultsName("tables") + Optional(whereToken + whereExpression.setResultsName("where"), "") + Optional(groupByToken + groupByExpression.setResultsName("groupby"), "") + Optional(orderByToken + orderByExpression.setResultsName("orderby"), "") + Optional(limitToken + limitExpression.setResultsName("limit"), "") + Optional(offsetToken + offsetExpression.setResultsName("offset"), "")) sql_parser = selectStmt # + stringEnd sqlComment = "--" + restOfLine # ignore comments sql_parser.ignore(sqlComment)
| min | 7 | 43 | 7 | 15 | 82 | 98 | 1 | 37 | | max | 11 | 52 | 10 | 17 | 85 | 112 | 4 | 39 | | ave | 9 | 47 | 8 | 16 | 84 | 106 | 3 | 38 | | sdev | 1 | 3 | 1 | 1 | 1 | 3 | 1 | 1 | +-------+------+------+------+------+------+------+------+------+ """ # define grammar for datatable underline = Word("-=") number = Word(nums).setParseAction( lambda t : int(t[0]) ) vert = Literal("|").suppress() rowDelim = ("+" + ZeroOrMore( underline + "+" ) ).suppress() columnHeader = Group(vert + vert + delimitedList(Word(alphas + nums), "|") + vert) heading = rowDelim + columnHeader.setResultsName("columns") + rowDelim rowData = Group( vert + Word(alphas) + vert + delimitedList(number,"|") + vert ) trailing = rowDelim datatable = heading + Dict( ZeroOrMore(rowData) ) + trailing # now parse data and print results data = datatable.parseString(testData) print data print data.asXML("DATA") pprint.pprint(data.asList()) print "data keys=", data.keys() print "data['min']=", data['min'] print "sum(data['min']) =", sum(data['min']) print "data.max =", data.max print "sum(data.max) =", sum(data.max)
E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group((columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + selectStmt + ")") | ("(" + whereExpression + ")")) whereExpression << whereCondition + ZeroOrMore((and_ | or_) + whereExpression) # define the grammar selectStmt << (selectToken + ('*' | columnNameList).setResultsName("columns") + fromToken + tableNameList.setResultsName("tables") + Optional(Group(CaselessLiteral("where") + whereExpression), "").setResultsName("where")) simpleSQL = selectStmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine simpleSQL.ignore(oracleSqlComment)
species_definition = Group(Optional(Group('@' + Word(alphanums + '_')).setResultsName('speciesCompartment') + Suppress(':')) + delimitedList(molecule_instance, delim='.').setResultsName('speciesPattern')) reaction_definition = Group(Group(delimitedList(species_definition, delim='+')).setResultsName('reactants') + (uni_arrow | bi_arrow) + Group(delimitedList(species_definition, delim='+')).setResultsName('products') + Group(lbracket + (numarg | (identifier + Suppress(Optional('()')))) + Optional(comma + (numarg | (identifier + Suppress(Optional('()'))))) + rbracket).setResultsName('rate')) # generic hash section grammar hashed_section = (hashsymbol + Group(OneOrMore(name) + section_enclosure2_)) # hash system_constants # system_constants = Group() hashed_system_constants = Group(hashsymbol + Suppress(system_constants_) + lbrace + OneOrMore(statement) + rbrace) # hash molecule_entry diffusion_entry_ = Group((diffusion_constant_2d_.setResultsName('2D') | diffusion_constant_3d_.setResultsName('3D')) + Suppress(equal) + (function_entry_.setResultsName('function') | (identifier | numarg).setResultsName('variable'))) molecule_entry = Group(molecule_definition + Optional(Group(lbrace + Optional(diffusion_entry_.setResultsName('diffusionFunction')) + (ZeroOrMore(statement)).setResultsName('moleculeParameters') + rbrace))) hashed_molecule_section = Group(hashsymbol + Suppress(define_molecules_) + lbrace + OneOrMore(molecule_entry) + rbrace) # hash function entry function_name = Group(identifier + '()') math_function_entry = Group(function_name.setResultsName('functionName') + Suppress(equal) + Group(restOfLine).setResultsName('functionBody')) hashed_function_section = Group(hashsymbol + Suppress(define_functions_) + lbrace + ZeroOrMore(math_function_entry) + rbrace) # hash reaction entry hashed_reaction_section = Group(hashsymbol + Suppress(define_reactions_) + lbrace + OneOrMore(reaction_definition) + rbrace) # hash observable entry count_definition = Group(count_ + lbracket + species_definition.setResultsName('speciesPattern') + Suppress(',') + identifier + rbracket) observable_entry = Group(lbrace + Group(delimitedList(count_definition, delim='+')).setResultsName('patterns') + rbrace + Suppress('=>') + quotedString.setResultsName('outputfile'))
identifier = Word(alphas, alphanums + "_") selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) orderByToken = Keyword("order", caseless=True) + Keyword("by", caseless=True) limitToken = Keyword("limit", caseless=True) columnNameList = Group(delimitedList(identifier | '*')) createBtableStatement = Keyword("create", caseless=True) + Keyword("btable", caseless=True) + \ identifier.setResultsName("tablename") + fromToken + identifier.setResultsName("filename") #orderByClause = orderByToken + selectStatement << (selectToken + columnNameList.setResultsName("columns") + fromToken + identifier.setResultsName("tablename") + Optional(whereClause) + Optional(orderByClause) + Optional(limitClause)) BQLStatement = (selectStatement | createBtableStatement) + Optional(';') BQL = ZeroOrMore(BQLStatement) ## allows comments dashComment = "--" + restOfLine BQL.ignore(dashComment) def test(str): print str, "->"
| min | 7 | 43 | 7 | 15 | 82 | 98 | 1 | 37 | | max | 11 | 52 | 10 | 17 | 85 | 112 | 4 | 39 | | ave | 9 | 47 | 8 | 16 | 84 | 106 | 3 | 38 | | sdev | 1 | 3 | 1 | 1 | 1 | 3 | 1 | 1 | +-------+------+------+------+------+------+------+------+------+ """ # define grammar for datatable underline = Word("-=") number = Word(nums).setParseAction(lambda s, l, t: (l, [int(t[0])])) vert = Literal("|").suppress() rowDelim = ("+" + ZeroOrMore(underline + "+")).suppress() columnHeader = Group(vert + vert + delimitedList(Word(alphas + nums), "|") + vert) heading = rowDelim + columnHeader.setResultsName("columns") + rowDelim rowData = Group(vert + Word(alphas) + vert + delimitedList(number, "|") + vert) trailing = rowDelim datatable = heading + Dict(ZeroOrMore(rowData)) + trailing # now parse data and print results data = datatable.parseString(testData) print data print data.asXML() pprint.pprint(data.asList()) print "data keys=", data.keys() print "data['min']=", data["min"] print "sum(data['min']) =", sum(data["min"]) print "data.max =", data.max print "sum(data.max) =", sum(data.max)
#PROPER_NOUN = Group(OneOrMore(NNP|NNPS)) VERB = OneOrMore(VDB|VBG|VBN|VBP|VBZ|VB) ADJECTIVE = OneOrMore(JJ) ADVERB = OneOrMore(RB) ARTICLE_NOUN = Group(DT + NOUN) ADJECTIVE_NOUN = Group(ADJECTIVE + NOUN) VERB_NOUN = Group(VERB + NOUN) POS_NOUN = Group((ARTICLE_NOUN | NOUN) + POS + (ADJECTIVE_NOUN | VERB_NOUN | NOUN)) ARTICLE_ADJECTIVE_NOUN = Group(DT + ADJECTIVE_NOUN) NOUN_AND_NOUN = Group((NOUN | ARTICLE_NOUN) + AND + (POS_NOUN | ARTICLE_NOUN | NOUN)) NOUN_OF_NOUN = Group((NOUN_AND_NOUN | ARTICLE_NOUN | NOUN) + OF + (NOUN_AND_NOUN | ARTICLE_NOUN | NOUN)) #TODO DT.setResultsName('article') NOUN.setResultsName('noun') #TODO expr = Forward() expr << (NOUN_OF_NOUN | NOUN_AND_NOUN | ARTICLE_ADJECTIVE_NOUN | ADJECTIVE_NOUN | POS_NOUN | ARTICLE_NOUN | NOUN) def read_in_file(): """Returns read in csv file(s).""" title_list = [] with open('C:/Users/din_m/Desktop/MA/booklist+.csv', 'rb') as f:
def create_filter(): """ Create a grammar for filtering on table scans """ filter_exp = Group(Optional(Suppress('(')) + filter_constraint + ZeroOrMore(Suppress(and_) + filter_constraint) + Optional(Suppress(')'))) return (upkey('filter') + filter_exp.setResultsName('filter'))
comment = cppStyleComment field = Group( Optional(comment).setResultsName("doc") + identifier.setResultsName("name") + ':' + basic_type.setResultsName("type") + Optional(':' + bit_width.setResultsName("width")) + ';' ) struct = originalTextFor( Group( Optional(comment).setResultsName("doc") + Keyword("struct") + identifier.setResultsName("name") + "{" + OneOrMore(field.setResultsName("fields", listAllMatches=True)) + "}" ), asString=False ) vector = Group( Keyword("vector") + "<" + qualified_identifier.setResultsName("type") + ">" ) multivector = Group( Keyword("multivector") + "<" + bit_width.setResultsName("width") + "," + delimitedList(qualified_identifier.setResultsName("type", listAllMatches=True), ",") + ">" ) single_object = Group( qualified_identifier.setResultsName("type")
RvalList = Group( delimitedList( Rval ) ) columnRval = Rval | columnName # need to add support for alg expressions whereCondition = Group( ( columnName + binop + columnRval ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + selectStmt + ")" ) | ( Suppress("(") + whereExpression + Suppress(")") ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) # define the grammar selectStmt << ( selectToken + ( '*' | columnNameList ).setResultsName( "columns" ) + fromToken + tableNameList.setResultsName( "tables" ) + Optional( Group( CaselessLiteral("where") + whereExpression ), "" ).setResultsName("where") ) # INSERT statements insertToken = Keyword("insert", caseless=True) intoToken = Keyword("into", caseless=True) valuesToken = Keyword("values", caseless=True) insertStmt = ( insertToken + intoToken + tableName.setResultsName("tables") + "(" + columnNameList.setResultsName("columns") + ")" + valuesToken + "(" + RvalList.setResultsName("vals") + ")" ) # DELETE statements deleteToken = Keyword("delete", caseless=True)
Optional( E + Optional("+") + Word(nums) ) ) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group( ( columnName + binop + columnRval ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + selectStmt + ")" ) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) # define the grammar selectStmt << ( selectToken + ( '*' | columnNameList ).setResultsName( "columns" ) + fromToken + tableNameList.setResultsName( "tables" ) + Optional( Group( CaselessLiteral("where") + whereExpression ), "" ).setResultsName("where") ) simpleSQL = selectStmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine simpleSQL.ignore( oracleSqlComment ) test( "SELECT * from XYZZY, ABC" ) test( "select * from SYS.XYZZY" ) test( "Select A from Sys.dual" ) test( "Select A,B,C from Sys.dual" ) test( "Select A, B, C from Sys.dual" ) test( "Select A, B, C from Sys.dual, Table2 " )
whereExpression = Forward() intNum = Word(nums) selectOperation = inToken | containsToken propPair = (quotedString + ":" + quotedString) quotedStringList = "(" + delimitedList(quotedString) + ")" whereCondition = Group((columnName + selectOperation + quotedStringList.setResultsName("value_list") + Optional(exceptToken + quotedStringList.setResultsName("except_values")) + Optional(withToken + "(" + delimitedList(propPair).setResultsName("prop_list") + ")") ) | (queryToken + isToken + quotedString) ) whereExpression << (whereCondition.setResultsName("condition", listAllMatches=True) + ZeroOrMore(andToken + whereExpression)) orderseq = ascToken | descToken orderByExpression = Forward() orderBySpec = Group(columnName + Optional(orderseq)) orderByExpression << (orderBySpec.setResultsName("orderby_spec", listAllMatches=True) + ZeroOrMore("," + orderByExpression)) orderByClause = (orderbyToken + orderByExpression).setResultsName("orderby") limitClause = (limitToken + Group(Optional(intNum + ",") + intNum)).setResultsName("limit") trueOrFalse = trueToken | falseToken facetOrderBy = hitsToken | valueToken
def gen_parser(): # define SQL tokens selectStmt = Forward() selectToken = Keyword(QueryTokens.SELECT, caseless=True) fromToken = Keyword(QueryTokens.FROM, caseless=True) intoToken = Keyword(QueryTokens.INTO, caseless=True) groupByToken = Keyword(QueryTokens.GROUPBY, caseless=True) windowToken = Keyword(QueryTokens.WINDOW, caseless=True) asToken = Keyword(QueryTokens.AS, caseless=True).setParseAction(upcaseTokens) nullToken = Keyword(QueryTokens.NULL, caseless=False).setParseAction(replace(QueryTokens.NULL_TOKEN)) # Math operators E = CaselessLiteral("E") binop = oneOf("= != < > >= <= == eq ne lt le gt ge %s" % (QueryTokens.CONTAINS), caseless=True).setParseAction(upcaseTokens) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = delimitedList( ident, ".", combine=True ) columnName.setParseAction(label(QueryTokens.COLUMN_NAME)) aliasName = delimitedList( ident, ".", combine=True ) stringLiteral = Forward() stringLiteral << quotedString stringLiteral.setParseAction(label(QueryTokens.STRING_LITERAL)) intLiteral = Forward() intLiteral << intNum intLiteral.setParseAction(label(QueryTokens.INTEGER_LITERAL)) floatLiteral = Forward() floatLiteral << realNum floatLiteral.setParseAction(label(QueryTokens.FLOAT_LITERAL)) columnExpression = Forward() columnFunction = Word(alphas, alphanums) + "(" + Optional(delimitedList(Group( floatLiteral ) | Group ( stringLiteral ) | Group( intLiteral ) | columnExpression)) + ")" columnFunction.setParseAction(label(QueryTokens.FUNCTION_OR_AGGREGATE)) columnExpression << Group ( (columnFunction | columnName) + Optional( asToken + aliasName ) ) columnExpressionList = Group( delimitedList( columnExpression ) ) tableName = delimitedList( ident, ".", combine=True ).setParseAction(upcaseTokens) tableNameList = Group( delimitedList( tableName ) ) timeExpression = Word( nums ) + oneOf("seconds minutes hours days", caseless=True).setParseAction(downcaseTokens) stdoutToken = Keyword(QueryTokens.STDOUT, caseless=True).setParseAction(upcaseTokens) tableToken = Keyword(QueryTokens.TABLE, caseless=True).setParseAction(upcaseTokens) streamToken = Keyword(QueryTokens.STREAM, caseless=True).setParseAction(upcaseTokens) intoLocation = stdoutToken | ( tableToken + ident ) | ( streamToken + ident ) whereExpression = Forward() and_ = Keyword(QueryTokens.AND, caseless=True).setParseAction(upcaseTokens) or_ = Keyword(QueryTokens.OR, caseless=True).setParseAction(upcaseTokens) in_ = Keyword(QueryTokens.IN, caseless=True).setParseAction(upcaseTokens) columnRval = realNum | intNum | nullToken | columnExpression | quotedString.setParseAction(removeQuotes) whereCondition = Group( ( columnExpression + binop + columnRval ).setParseAction(label(QueryTokens.WHERE_CONDITION)) | ( columnExpression + in_ + "(" + delimitedList( columnRval ) + ")" ).setParseAction(label(QueryTokens.WHERE_CONDITION)) | ( columnExpression + in_ + "(" + selectStmt + ")" ).setParseAction(label(QueryTokens.WHERE_CONDITION)) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) # define the grammar selectStmt << ( Group ( selectToken + columnExpressionList ).setResultsName( "select" ) + fromToken + tableNameList.setResultsName( "sources" ) + Optional(intoToken + intoLocation, "").setResultsName("into") + Optional( Group( CaselessLiteral(QueryTokens.WHERE) + whereExpression ), "" ).setResultsName("where") + Optional ( groupByToken + columnExpressionList, "").setResultsName("groupby") + Optional ( windowToken + timeExpression, "").setResultsName("window") ) parser = selectStmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine parser.ignore( oracleSqlComment ) return parser
intLit = Word(nums) C = Forward() T = ((stringLit | intLit | w) + oneOf(_ops) + (stringLit | intLit | w)) | (Suppress("(") + C + Suppress(")")) C << ((T + combinators + T) | T) bvalues = Group(Suppress("(") + values + Suppress(")")) svalues = (bvalues | '*').setResultsName("columns") where_clause = CaselessLiteral("WHERE") + C where = Optional(where_clause, "").setResultsName("where") op = operators.setResultsName("op") sgram = op + svalues + where + StringEnd() insert = CaselessLiteral("INSERT").setResultsName( "op") + bvalues.setResultsName('ivalues') + StringEnd() delete = CaselessLiteral("DELETE").setResultsName("op") + where + StringEnd() stmt = sgram | insert | delete def parse_sql(s): d = {'stmt': s} try: r = stmt.parseString(s) except pyPE, e: raise ParseException(e) o = r.op.lower() if o == "insert": d['values'] = list(r.ivalues) return Insert(d) elif o == "select":
TODO: this will only work if the neighbors are grouped together - is that sufficient for configs? or can we assume that they will be ordered if output from the router """ bgp_neighbor = Group( ("neighbor" + ipV4Address("neighbor") + "remote-as" + integer("asn"))("remote-as") & Optional("neighbor" + ipV4Address("neighbor") + "update-source" + ipV4Address("update-source"))("update-source") & Optional("neighbor" + ipV4Address("neighbor") + true_if_set("send-community"))("send-community") & Optional("neighbor" + ipV4Address("neighbor") + true_if_set("next-hop-self"))("next-hop-self") ) neighbors = OneOrMore( bgp_neighbor.setResultsName("neighbor", listAllMatches=True) | Suppress(comment) #)("neighbors") ).setResultsName("neighbors") bgp_indent = OneOrMore( ("bgp router-id" + router_id("router-id"))("router-id") | bgp_synchronization | bgp_networks | comment | neighbors ) bgp_stanza = (router_bgp + bgp_indent).setParseAction(fn_bgp)("bgp")
datetime_type = Group(DATETIME) date_type = Group(DATE) string_size = integer_literal.setResultsName('size') nvarchar_type = Group(VARCHAR + LPAR + string_size + RPAR) nchar_type = Group(CHAR + LPAR + string_size + RPAR) data_type = (integer_type|float_type|datetime_type| date_type|nvarchar_type|nchar_type).setResultsName('data_type') # Table identifier table_name = identifier.setResultsName("table_name") # Column identifier column_name = identifier.setResultsName("column_name") fully_qualified_column_name = table_name + dot + column_name column = Group(fully_qualified_column_name | column_name) column = column.setResultsName('column') # Boolean and Arithmetic expression: # ================================= # Bool and Arith expressions are used in the where clause ( WHERE a > 3*c) and # in the projections (SELECT days/7 AS weeks). We based our expressions on the # following grammar: # # <bool-expr> ::= <bool-term> [OR <bool-term>]* # <bool-term> ::= <not-factor> [AND <not-factor>]* # <bool-factor> ::= [NOT] <predicate> # <predicate> ::= <arith-expr> [<pred-op> <arith-expr>] # <arith-expr> ::= <term> [<add-op> <term>]* # <term> ::= <signed factor> [<mult-op> factor]* # <signed factor> ::= [<sign>] <factor> # <factor> ::= <literal> | <column> | function | (<bool-expr>)
img = Group( Suppress(Word("img")) + identifier + ZeroOrMore(Suppress(",") + identifier)) name = Suppress(Word("name")) + identifier typeblock = Suppress(Word("blocktype")) + identifier var = identifier.setResultsName("type") + identifier.setResultsName( "varName") + Word(nums).setResultsName("defaultValue") variables = Suppress(Word("variables")) + op + Group(var) + ZeroOrMore( Group(var)) + cl block = Group( CaselessLiteral("block") + op + Group( type.setResultsName("type") + name.setResultsName("name") + file.setResultsName("file") + Optional(variables.setResultsName("variables")) + img.setResultsName("img") + cl)) parser = block + ZeroOrMore(block) config = """ block{ type operador name + file None img blocks/block4, blocks/block3 blocktype simple } block{ type operador
realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = ZeroOrMore( (columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + ZeroOrMore( selectToken + ('*' | columnNameList).setResultsName("nestedcolumns") + fromToken + tableNameList.setResultsName("nestedtables") + Optional(ZeroOrMore(CaselessLiteral("where") + whereExpression), "").setResultsName("nestedwhere")) + ")") | ("(" + whereExpression + ")") | (columnName + in_ + "(" + ZeroOrMore( selectToken + ('*' | columnNameList).setResultsName("nestedcolumns") + fromToken + tableNameList.setResultsName("nestedtables") + Optional(ZeroOrMore(CaselessLiteral("where") + whereExpression), "").setResultsName("nestedwhere")) + ")") | ("(" + whereExpression + ")") + Optional(oper + "(" + ZeroOrMore( selectToken + ('*' | columnNameList).setResultsName("nestedcolumns") + fromToken + tableNameList.setResultsName("nestedtables") + Optional(ZeroOrMore(CaselessLiteral("where") + whereExpression), "").setResultsName("nestedwhere")) + ")") | ("(" + whereExpression + ")")) whereExpression << whereCondition + ZeroOrMore((and_ | or_) + whereExpression)
header = Group( pri.setResultsName('pri') + version.setResultsName('version') + SP + timestamp.setResultsName('timestamp') + SP + hostname.setResultsName('hostname') + SP + appname.setResultsName('appname') + SP + procname.setResultsName('procname') + SP + msgid.setResultsName('msgid') ) sd_name = Word(NameAscii, min=1, max=32) sd_id = sd_name param_name = sd_name sd_param = Group( param_name.setResultsName('param_name') + Suppress(Literal("=")) + QuotedString(quoteChar='"', escChar='\\', escQuote='\\').setResultsName('param_value') ) sd_element = Group( Suppress("[") + sd_id.setResultsName('sd_id') + ZeroOrMore(SP + sd_param).setResultsName('sd_params') + Suppress("]") ) structured_data = (NilValue | Group(OneOrMore(sd_element))).setResultsName('sd_element') msg = Combine(restOfLine + lineEnd) syslog_message = header.setResultsName('header') + SP + structured_data.setResultsName('sd') + \ Optional(SP + msg.setResultsName('msg')) __all__ = ['syslog_message', 'structured_data', 'header']
dblQuotedString = Combine( _dblQuote + ZeroOrMore(CharsNotIn('\\"\n\r') | _escapedChar | '""') + _dblQuote).streamline().setName("string enclosed in double quotes") sglQuotedString = Combine( _sglQuote + ZeroOrMore(CharsNotIn("\\'\n\r") | _escapedChar | "''") + _sglQuote).streamline().setName("string enclosed in single quotes") quotedArg = (dblQuotedString | sglQuotedString) quotedArg.setParseAction(removeQuotes) quotedArg.setName("quotedArg") plainArgChars = printables.replace('#', '').replace('"', '').replace("'", "") plainArg = Word(plainArgChars) plainArg.setName("plainArg") arguments = Group(ZeroOrMore(quotedArg | plainArg)) arguments = arguments.setResultsName('arguments') arguments.setName("arguments") # comment line. comment = Literal('#') + restOfLine comment = comment.suppress() comment.setName('comment') full_command = (comment | (command + arguments + Optional(comment))) full_command.setName('full_command') ### command_list = [] # filled in by namespaces.init_global_dict(). # command/argument handling.
def compute(self, text, verbose=True): # Literals dollar = Literal('$') amper = Literal('&') at = Literal('@') qm = Literal('?') em = Literal('!') dot = Literal('.') colon = Literal(":") vbar = Literal("|") lbrack = Literal("[") rbrack = Literal("]") lcurly = Literal("{") rcurly = Literal("}") lparen = Literal("(") rparen = Literal(")") lt = Literal("<") gt = Literal(">") eq = Literal("=") deq = Literal("==") # Reusables spellId = Word(nums, min=2, max=6).addParseAction( tokenMap(int)).setResultsName("spellId") idx = Word(nums, max=1).addParseAction(tokenMap(int)).setResultsName("id") var = Word(alphas).setResultsName("var") # Spell References effectId = Optional( Word(nums, max=2).addParseAction( tokenMap(int)).setResultsName("effectId")) references = (dollar.suppress() + ((at.suppress() + var + Optional(spellId)) | (spellId + var + effectId) | (var + effectId))).addParseAction(self.setReferences) # Conditions brackets = Suppress(lbrack) + SkipTo(rbrack).setResultsName( "statement") + Suppress(rbrack) value = Word(nums, max=5).addParseAction( tokenMap(int)).setResultsName("value") conditionVar = Group( Optional(em).setResultsName("not") + Optional(var) + (spellId | idx) | Optional("-") + value | Word(alphanums, exact=8).setResultsName("hashVariable")) conditions = ((dollar + qm).suppress() + OneOrMore( Group( Optional(Suppress(qm)) + Optional(Suppress(lparen)) + OneOrMore( conditionVar.setResultsName("variables*") + Optional(Combine(em + eq) | amper | vbar | deq | lt | gt).setResultsName("operators*")) + Optional(Suppress(rparen)) + brackets).setResultsName("conditions*")) + brackets).addParseAction(lambda t: self.setConditions( t, verbose=verbose)) + Optional(dot.suppress()) # Call Variable callVariables = (Suppress((lt + dollar) | (dollar + lt)) + SkipTo(gt).setResultsName("name") + Suppress(gt)).addParseAction(self.callVariables) # Expressions expressions = ( Suppress(dollar + lcurly) + SkipTo(rcurly).setResultsName("content") + rcurly + Optional( dot.suppress() + Word(nums, exact=1).addParseAction( tokenMap(int)).setResultsName("mod"), ) ).addParseAction(lambda t: self.setExpressions(t, verbose=verbose)) # Language Choices languageChoices = ( (Literal('$L') | Literal('$l')).suppress() + OneOrMore(Word(alphas) + Optional(Literal(":").suppress()) ).setResultsName("options*") + Literal(';').suppress()).addParseAction(self.setLanguageChoices) # Icons icons = (Literal("|T").suppress() + SkipTo(colon).setResultsName("path") + colon.suppress() + Word(nums, exact=2).addParseAction( tokenMap(int)).setResultsName("size") + Literal("|t").suppress()).addParseAction(self.setIcons) # Parsing layer by layer parsingOrder = [ icons, languageChoices, callVariables, references, expressions, conditions ] steps = [text] for parser in parsingOrder: steps.append(parser.transformString(steps[-1])) result = steps[-1] # Replace each Sha1 Hash placeholder by refering value if verbose: for k, v in self.variables.items(): result = result.replace(k, str(v)) # Display fixes displayFixes = [["*% of", "% of"], ["power)%", "power)"]] for bef, aft in displayFixes: result = result.replace(bef, aft) return super(SpellDescriptionParser, self).compute(result, verbose)