Ejemplo n.º 1
0
def parse(str):
    tokens = ''
    # define SQL tokens
    selectStmt = Forward()
    selectToken = Keyword("select", caseless=True)
    fromToken = Keyword("from", caseless=True)

    ident = Word(alphas, alphanums + "_$").setName("identifier")
    columnName = Upcase(delimitedList(ident, ".", combine=True))
    columnNameList = Group(delimitedList(columnName))
    tableName = Upcase(delimitedList(ident, ".", combine=True))
    tableNameList = Group(delimitedList(tableName))

    whereExpression = Forward()
    and_ = Keyword("and", caseless=True)
    or_ = Keyword("or", caseless=True)
    in_ = Keyword("in", caseless=True)

    E = CaselessLiteral("E")
    binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
    arithSign = Word("+-", exact=1)
    realNum = Combine(
        Optional(arithSign) +
        (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) +
        Optional(E + Optional(arithSign) + Word(nums)))
    intNum = Combine(
        Optional(arithSign) + Word(nums) +
        Optional(E + Optional("+") + Word(nums)))

    columnRval = realNum | intNum | quotedString | columnName  # need to add support for alg expressions
    whereCondition = Group((columnName + binop + columnRval)
                           | (columnName + in_ + "(" +
                              delimitedList(columnRval) + ")")
                           | (columnName + in_ + "(" + selectStmt + ")")
                           | ("(" + whereExpression + ")"))
    whereExpression << whereCondition + ZeroOrMore(
        (and_ | or_) + whereExpression)

    # define the grammar
    selectStmt << (selectToken +
                   ('*' | columnNameList).setResultsName("columns") +
                   fromToken + tableNameList.setResultsName("tables") +
                   Optional(Group(CaselessLiteral("where") + whereExpression),
                            "").setResultsName("where"))

    simpleSQL = selectStmt

    # define Oracle comment format, and ignore them
    oracleSqlComment = "--" + restOfLine
    simpleSQL.ignore(oracleSqlComment)

    try:
        tokens = simpleSQL.parseString(str)
    except ParseException, err:
        print " " * err.loc + "^\n" + err.msg
        print err
Ejemplo n.º 2
0
def grammar():
    _and = Literal('+').setParseAction(lambda: '+')
    _not = Literal('!').setParseAction(lambda: '!')
    _or = Literal('|').setParseAction(lambda: '|')
    _xor = Literal('^').setParseAction(lambda: '^')
    lpar = Literal("(").suppress()
    rpar = Literal(")").suppress()
    polynomial = Forward()
    sign = _and | _or | _xor
    letter = Upcase(Word(alphas, max=1))
    atom = letter
    monomial = Optional(_not) + atom + Optional(sign) | (lpar + polynomial +
                                                         rpar)
    polynomial << Group(monomial + ZeroOrMore(monomial))
    return polynomial('left') + '=>' + polynomial('right')
Ejemplo n.º 3
0
    print


# define SQL tokens
selectStmt = Forward()
condition = Forward()
treegram = Forward()
comps = Forward()
selectToken = Keyword("select", caseless=True)
fromToken = Keyword("from", caseless=True)
astoken = Keyword("AS", caseless=True)
groupToken = Keyword("GROUPBY", caseless=True)
havingToken = Keyword("HAVING", caseless=True)

ident = Word(alphas, alphanums + "_$").setName("identifier")
columnName = Upcase(delimitedList(ident, ".", combine=True))
columnNameList = Group(delimitedList(columnName))
columnNameList1 = Group(delimitedList(columnName))
groupNameList = Group(delimitedList(columnName))
tableName = Upcase(delimitedList(ident, ".", combine=True))
tableName2 = Upcase(delimitedList(ident, ".", combine=True))
tableAlias = tableName + astoken + tableName2
tableNameList = Group(delimitedList(tableAlias | tableName))

whereExpression = Forward()
and_ = Keyword("and", caseless=True)
or_ = Keyword("or", caseless=True)
in_ = Keyword("in", caseless=True)

E = CaselessLiteral("E")
binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
Ejemplo n.º 4
0
class BaseParser(object):
    """Parses an alignment and returns species sequence tuples"""

    # I think this covers it...
    BASES = Upcase(Word(alphas + "?.-"))

    def __init__(self):
        self.sequence_length = None
        self.species_count = None
        self.sequences = []
        self.current_sequence = 0

        self.root_parser = self.init_parser() + stringEnd

    def parse(self, s):
        try:
            self.root_parser.parseString(s)

        except ParseException as p:
            log.error("Error in Alignment Parsing:" + str(p))
            log.error("A common cause of this error is having whitespace"
                      ", i.e. spaces or tabs, in the species names. Please check this and remove"
                      " all whitespace from species names, or replace them with e.g. underscores")

            raise AlignmentError

        # Check that all the sequences are equal length
        slen = None
        names = set()
        for nm, seq in self.sequences:
            if nm in names:
                log.error("Repeated species name '%s' is repeated "
                          "in alignment", nm)
                raise AlignmentError

            names.add(nm)
            if slen is None:
                # Use the first as the test case
                slen = len(seq)
            else:
                if len(seq) != slen:
                    log.error(
                        "Bad alignment file: Not all species have the same sequences length")
                    raise AlignmentError

        # Not all formats have a heading, but if we have one do some checking
        if self.sequence_length is None:
            self.sequence_length = len(self.sequences[0][1])
        else:
            if self.sequence_length != slen:
                log.error("Bad Alignment file: sequence length count in header does not match"
                          " sequence length in file, please check")
                raise AlignmentError

        if self.species_count is None:
            self.species_count = len(self.sequences)
        else:
            if len(self.sequences) != self.species_count:
                log.error("Bad Alignment file: species count in header does not match"
                          " number of sequences in file, please check")
                raise AlignmentError