def parse(str): tokens = '' # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = Upcase(delimitedList(ident, ".", combine=True)) columnNameList = Group(delimitedList(columnName)) tableName = Upcase(delimitedList(ident, ".", combine=True)) tableNameList = Group(delimitedList(tableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group((columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + selectStmt + ")") | ("(" + whereExpression + ")")) whereExpression << whereCondition + ZeroOrMore( (and_ | or_) + whereExpression) # define the grammar selectStmt << (selectToken + ('*' | columnNameList).setResultsName("columns") + fromToken + tableNameList.setResultsName("tables") + Optional(Group(CaselessLiteral("where") + whereExpression), "").setResultsName("where")) simpleSQL = selectStmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine simpleSQL.ignore(oracleSqlComment) try: tokens = simpleSQL.parseString(str) except ParseException, err: print " " * err.loc + "^\n" + err.msg print err
def grammar(): _and = Literal('+').setParseAction(lambda: '+') _not = Literal('!').setParseAction(lambda: '!') _or = Literal('|').setParseAction(lambda: '|') _xor = Literal('^').setParseAction(lambda: '^') lpar = Literal("(").suppress() rpar = Literal(")").suppress() polynomial = Forward() sign = _and | _or | _xor letter = Upcase(Word(alphas, max=1)) atom = letter monomial = Optional(_not) + atom + Optional(sign) | (lpar + polynomial + rpar) polynomial << Group(monomial + ZeroOrMore(monomial)) return polynomial('left') + '=>' + polynomial('right')
print # define SQL tokens selectStmt = Forward() condition = Forward() treegram = Forward() comps = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) astoken = Keyword("AS", caseless=True) groupToken = Keyword("GROUPBY", caseless=True) havingToken = Keyword("HAVING", caseless=True) ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = Upcase(delimitedList(ident, ".", combine=True)) columnNameList = Group(delimitedList(columnName)) columnNameList1 = Group(delimitedList(columnName)) groupNameList = Group(delimitedList(columnName)) tableName = Upcase(delimitedList(ident, ".", combine=True)) tableName2 = Upcase(delimitedList(ident, ".", combine=True)) tableAlias = tableName + astoken + tableName2 tableNameList = Group(delimitedList(tableAlias | tableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
class BaseParser(object): """Parses an alignment and returns species sequence tuples""" # I think this covers it... BASES = Upcase(Word(alphas + "?.-")) def __init__(self): self.sequence_length = None self.species_count = None self.sequences = [] self.current_sequence = 0 self.root_parser = self.init_parser() + stringEnd def parse(self, s): try: self.root_parser.parseString(s) except ParseException as p: log.error("Error in Alignment Parsing:" + str(p)) log.error("A common cause of this error is having whitespace" ", i.e. spaces or tabs, in the species names. Please check this and remove" " all whitespace from species names, or replace them with e.g. underscores") raise AlignmentError # Check that all the sequences are equal length slen = None names = set() for nm, seq in self.sequences: if nm in names: log.error("Repeated species name '%s' is repeated " "in alignment", nm) raise AlignmentError names.add(nm) if slen is None: # Use the first as the test case slen = len(seq) else: if len(seq) != slen: log.error( "Bad alignment file: Not all species have the same sequences length") raise AlignmentError # Not all formats have a heading, but if we have one do some checking if self.sequence_length is None: self.sequence_length = len(self.sequences[0][1]) else: if self.sequence_length != slen: log.error("Bad Alignment file: sequence length count in header does not match" " sequence length in file, please check") raise AlignmentError if self.species_count is None: self.species_count = len(self.sequences) else: if len(self.sequences) != self.species_count: log.error("Bad Alignment file: species count in header does not match" " number of sequences in file, please check") raise AlignmentError