return OneOrMore(token + maybeComma)


digit_sequence = Word(nums)

sign = oneOf("+ -")


def convertToFloat(s, loc, toks):
    try:
        return float(toks[0])
    except:
        raise ParseException(loc, "invalid float format %s" % toks[0])


exponent = CaselessLiteral("e") + Optional(sign) + Word(nums)

#note that almost all these fields are optional,
#and this can match almost anything. We rely on Pythons built-in
#float() function to clear out invalid values - loosely matching like this
#speeds up parsing quite a lot
floatingPointConstant = Combine(
    Optional(sign) + Optional(Word(nums)) +
    Optional(Literal(".") + Optional(Word(nums))) + Optional(exponent))

floatingPointConstant.setParseAction(convertToFloat)

number = floatingPointConstant

#same as FP constant but don't allow a - sign
nonnegativeNumber = Combine(
Ejemplo n.º 2
0
    cppStyleComment, OneOrMore, quotedString, restOfLine, delimitedList, \
    dictOf, Forward, Dict


lbrace = Suppress("{")
rbrace = Suppress("}")
lbracket = Suppress("[")
rbracket = Suppress("]")
lparen = Suppress("(")
rparen = Suppress(")")
equal = Suppress("=")
comma = Suppress(",")
point = Literal('.')
tilde = ('~')
bang = ('!')
e = CaselessLiteral('E')
plusorminus = Literal('+') | Literal('-')
hashsymbol = Suppress("#")
dbquotes = '"'

uni_arrow = Literal("->")
bi_arrow = Literal("<->")

# system keywords
system_constants_ = Keyword("SYSTEM_CONSTANTS")

# molecule keywords
define_molecules_ = Keyword("DEFINE_MOLECULES")
define_functions_ = Keyword("DEFINE_FUNCTIONS")
diffusion_constant_3d_ = Keyword("DIFFUSION_CONSTANT_3D")
diffusion_constant_2d_ = Keyword("DIFFUSION_CONSTANT_2D")
Ejemplo n.º 3
0
def _build_asn1_grammar():
    def build_identifier(prefix_pattern):
        identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
        # todo: more rigorous? trailing hyphens and -- forbidden
        return Combine(
            Word(srange(prefix_pattern), exact=1) + identifier_suffix)

    def braced_list(element_rule):
        elements_rule = Optional(delimitedList(element_rule))
        return Suppress('{') + Group(elements_rule) + Suppress('}')

    def annotate(name):
        def annotation(t):
            return AnnotatedToken(name, t.asList())

        return annotation

    # Reserved words
    ANY = Keyword('ANY')
    DEFINED_BY = Keyword('DEFINED BY')
    DEFINITIONS = Keyword('DEFINITIONS')
    BEGIN = Keyword('BEGIN')
    END = Keyword('END')
    OPTIONAL = Keyword('OPTIONAL')
    DEFAULT = Keyword('DEFAULT')
    TRUE = Keyword('TRUE')
    FALSE = Keyword('FALSE')
    UNIVERSAL = Keyword('UNIVERSAL')
    APPLICATION = Keyword('APPLICATION')
    PRIVATE = Keyword('PRIVATE')
    MIN = Keyword('MIN')
    MAX = Keyword('MAX')
    IMPLICIT = Keyword('IMPLICIT')
    EXPLICIT = Keyword('EXPLICIT')
    EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
    IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
    AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
    EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
    COMPONENTS_OF = Keyword('COMPONENTS OF')
    ELLIPSIS = Keyword('...')
    SIZE = Keyword('SIZE')
    OF = Keyword('OF')
    IMPORTS = Keyword('IMPORTS')
    EXPORTS = Keyword('EXPORTS')
    FROM = Keyword('FROM')

    # Built-in types
    SEQUENCE = Keyword('SEQUENCE')
    SET = Keyword('SET')
    CHOICE = Keyword('CHOICE')
    ENUMERATED = Keyword('ENUMERATED')
    BIT_STRING = Keyword('BIT STRING')
    BOOLEAN = Keyword('BOOLEAN')
    REAL = Keyword('REAL')
    OCTET_STRING = Keyword('OCTET STRING')
    CHARACTER_STRING = Keyword('CHARACTER STRING')
    NULL = Keyword('NULL')
    INTEGER = Keyword('INTEGER')
    OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')

    # Restricted string types
    BMPString = Keyword('BMPString')
    GeneralString = Keyword('GeneralString')
    GraphicString = Keyword('GraphicString')
    IA5String = Keyword('IA5String')
    ISO646String = Keyword('ISO646String')
    NumericString = Keyword('NumericString')
    PrintableString = Keyword('PrintableString')
    TeletexString = Keyword('TeletexString')
    T61String = Keyword('T61String')
    UniversalString = Keyword('UniversalString')
    UTF8String = Keyword('UTF8String')
    VideotexString = Keyword('VideotexString')
    VisibleString = Keyword('VisibleString')

    # Useful types
    GeneralizedTime = Keyword('GeneralizedTime')
    UTCTime = Keyword('UTCTime')
    ObjectDescriptor = Keyword('ObjectDescriptor')

    # Literals
    number = Word(nums)
    signed_number = Combine(Optional('-') +
                            number)  # todo: consider defined values from 18.1
    bstring = Suppress('\'') + StringOf('01') + Suppress('\'B')
    hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H')

    # Comments
    hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
    comment = hyphen_comment | cStyleComment

    # identifier
    identifier = build_identifier('[a-z]')

    # references
    # these are duplicated to force unique token annotations
    valuereference = build_identifier('[a-z]')
    typereference = build_identifier('[A-Z]')
    module_reference = build_identifier('[A-Z]')
    reference = valuereference | typereference  # TODO: consider object references from 12.1

    # values
    # todo: consider more literals from 16.9
    boolean_value = TRUE | FALSE
    bitstring_value = bstring | hstring  # todo: consider more forms from 21.9
    integer_value = signed_number
    null_value = NULL
    cstring_value = dblQuotedString

    exponent = CaselessLiteral('e') + signed_number
    real_value = Combine(signed_number +
                         Optional(Literal('.') + Optional(number)) +
                         Optional(exponent))

    # In value range constraints, decimal points must be followed by number, or
    # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100])
    constraint_real_value = Combine(signed_number +
                                    Optional(Literal('.') + number) +
                                    Optional(exponent))

    builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
    external_value_reference = module_reference + Suppress(
        '.') + valuereference
    defined_value = external_value_reference | valuereference  # todo: more options from 13.1
    referenced_value = Unique(defined_value)  # todo: more options from 16.11

    # object identifier value
    name_form = Unique(identifier)
    number_form = Unique(number)
    name_and_number_form = name_form + Suppress('(') + number_form + Suppress(
        ')')
    objid_components = name_and_number_form | name_form | number_form | defined_value
    objid_components_list = OneOrMore(objid_components)
    object_identifier_value = Suppress('{') + \
                              (objid_components_list | (defined_value + objid_components_list)) + \
                              Suppress('}')

    value = builtin_value | referenced_value | object_identifier_value

    # definitive identifier value
    definitive_number_form = Unique(number)
    definitive_name_and_number_form = name_form + Suppress(
        '(') + definitive_number_form + Suppress(')')
    definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
    definitive_objid_component_list = OneOrMore(definitive_objid_component)
    definitive_identifier = Optional(
        Suppress('{') + definitive_objid_component_list + Suppress('}'))

    # tags
    class_ = UNIVERSAL | APPLICATION | PRIVATE
    class_number = Unique(number)  # todo: consider defined values from 30.1
    tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
    tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS

    # extensions
    extension_default = Unique(EXTENSIBILITY_IMPLIED)

    # values

    # Forward-declare these, they can only be fully defined once
    # we have all types defined. There are some circular dependencies.
    named_type = Forward()
    type_ = Forward()

    # constraints
    # todo: consider the full subtype and general constraint syntax described in 45.*
    lower_bound = (constraint_real_value | signed_number | referenced_value
                   | MIN)
    upper_bound = (constraint_real_value | signed_number | referenced_value
                   | MAX)
    single_value_constraint = Suppress('(') + value + Suppress(')')
    value_range_constraint = Suppress('(') + lower_bound + Suppress(
        '..') + upper_bound + Suppress(')')
    # TODO: Include contained subtype constraint here if we ever implement it.
    size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + (
        single_value_constraint | value_range_constraint) + Optional(
            Suppress(')'))

    # types
    # todo: consider other defined types from 13.1
    defined_type = Optional(module_reference + Suppress('.'),
                            default=None) + typereference + Optional(
                                size_constraint, default=None)

    # TODO: consider exception syntax from 24.1
    extension_marker = Unique(ELLIPSIS)

    component_type_optional = named_type + Suppress(OPTIONAL)
    component_type_default = named_type + Suppress(DEFAULT) + value
    component_type_components_of = Suppress(COMPONENTS_OF) + type_
    component_type = component_type_components_of | component_type_optional | component_type_default | named_type

    tagged_type = tag + Optional(IMPLICIT | EXPLICIT, default=None) + type_

    named_number_value = Suppress('(') + signed_number + Suppress(')')
    named_number = identifier + named_number_value
    named_nonumber = Unique(identifier)
    enumeration = named_number | named_nonumber

    set_type = SET + braced_list(component_type | extension_marker)
    sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
    sequenceof_type = Suppress(SEQUENCE) + Optional(
        size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
    setof_type = Suppress(SET) + Optional(
        size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
    choice_type = CHOICE + braced_list(named_type | extension_marker)
    selection_type = identifier + Suppress('<') + type_
    enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
    bitstring_type = BIT_STRING + Optional(
        braced_list(named_number), default=[]) + Optional(
            single_value_constraint | size_constraint, default=None)
    plain_integer_type = INTEGER
    restricted_integer_type = INTEGER + braced_list(named_number) + Optional(
        single_value_constraint, default=None)
    boolean_type = BOOLEAN
    real_type = REAL
    null_type = NULL
    object_identifier_type = OBJECT_IDENTIFIER
    octetstring_type = OCTET_STRING + Optional(size_constraint)
    unrestricted_characterstring_type = CHARACTER_STRING
    restricted_characterstring_type = BMPString | GeneralString | \
                                      GraphicString | IA5String | \
                                      ISO646String | NumericString | \
                                      PrintableString | TeletexString | \
                                      T61String | UniversalString | \
                                      UTF8String | VideotexString | \
                                      VisibleString
    characterstring_type = (
        restricted_characterstring_type
        | unrestricted_characterstring_type) + Optional(size_constraint)
    useful_type = GeneralizedTime | UTCTime | ObjectDescriptor

    # ANY type
    any_type = ANY + Optional(Suppress(DEFINED_BY + identifier))

    # todo: consider other builtins from 16.2
    simple_type = (any_type | boolean_type | null_type | octetstring_type
                   | characterstring_type | real_type | plain_integer_type
                   | object_identifier_type
                   | useful_type) + Optional(value_range_constraint
                                             | single_value_constraint)
    constructed_type = choice_type | sequence_type | set_type
    value_list_type = restricted_integer_type | enumerated_type
    builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type

    referenced_type = defined_type | selection_type  # todo: consider other ref:d types from 16.3

    type_ << (builtin_type | referenced_type)
    named_type << (identifier + type_)

    type_assignment = typereference + '::=' + type_
    value_assignment = valuereference + type_ + '::=' + value

    assignment = type_assignment | value_assignment
    assignment_list = ZeroOrMore(assignment)

    # TODO: Maybe handle full assigned-identifier syntax with defined values
    # described in 12.1, but I haven't been able to find examples of it, and I
    # can't say for sure what acceptable syntax is.
    global_module_reference = module_reference + Optional(
        object_identifier_value, default=None)

    symbol = Unique(reference)  # TODO: parameterized reference?
    symbol_list = delimitedList(symbol, delim=',')
    symbols_from_module = Group(
        Group(symbol_list) + Suppress(FROM) + global_module_reference)
    symbols_from_module_list = OneOrMore(symbols_from_module)
    symbols_imported = Unique(symbols_from_module_list)
    exports = Suppress(EXPORTS) + Optional(symbol_list) + Suppress(';')
    imports = Suppress(IMPORTS) + Optional(symbols_imported) + Suppress(';')

    module_body = Optional(exports, default=None) + Optional(
        imports, default=None) + assignment_list
    module_identifier = module_reference + definitive_identifier
    module_definition = module_identifier + Suppress(DEFINITIONS) + Optional(tag_default, default=None) + \
                        Optional(extension_default, default=None) + Suppress('::=') + \
                        Suppress(BEGIN) + module_body + Suppress(END)

    module_definition.ignore(comment)

    # Mark up the parse results with token tags
    identifier.setParseAction(annotate('Identifier'))
    named_number_value.setParseAction(annotate('Value'))
    tag.setParseAction(annotate('Tag'))
    class_.setParseAction(annotate('TagClass'))
    class_number.setParseAction(annotate('TagClassNumber'))
    type_.setParseAction(annotate('Type'))
    simple_type.setParseAction(annotate('SimpleType'))
    choice_type.setParseAction(annotate('ChoiceType'))
    sequence_type.setParseAction(annotate('SequenceType'))
    set_type.setParseAction(annotate('SetType'))
    value_list_type.setParseAction(annotate('ValueListType'))
    bitstring_type.setParseAction(annotate('BitStringType'))
    sequenceof_type.setParseAction(annotate('SequenceOfType'))
    setof_type.setParseAction(annotate('SetOfType'))
    named_number.setParseAction(annotate('NamedValue'))
    named_nonumber.setParseAction(annotate('NamedValue'))
    single_value_constraint.setParseAction(annotate('SingleValueConstraint'))
    size_constraint.setParseAction(annotate('SizeConstraint'))
    value_range_constraint.setParseAction(annotate('ValueRangeConstraint'))
    component_type.setParseAction(annotate('ComponentType'))
    component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
    component_type_default.setParseAction(annotate('ComponentTypeDefault'))
    component_type_components_of.setParseAction(
        annotate('ComponentTypeComponentsOf'))
    tagged_type.setParseAction(annotate('TaggedType'))
    named_type.setParseAction(annotate('NamedType'))
    type_assignment.setParseAction(annotate('TypeAssignment'))
    value_assignment.setParseAction(annotate('ValueAssignment'))
    module_reference.setParseAction(annotate('ModuleReference'))
    global_module_reference.setParseAction(annotate('GlobalModuleReference'))
    module_body.setParseAction(annotate('ModuleBody'))
    module_definition.setParseAction(annotate('ModuleDefinition'))
    extension_marker.setParseAction(annotate('ExtensionMarker'))
    name_form.setParseAction(annotate('NameForm'))
    number_form.setParseAction(annotate('NumberForm'))
    name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
    object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
    definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
    definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
    definitive_name_and_number_form.setParseAction(
        annotate('DefinitiveNameAndNumberForm'))
    exports.setParseAction(annotate('Exports'))
    imports.setParseAction(annotate('Imports'))
    assignment_list.setParseAction(annotate('AssignmentList'))
    bstring.setParseAction(annotate('BinaryStringValue'))
    hstring.setParseAction(annotate('HexStringValue'))
    defined_type.setParseAction(annotate('DefinedType'))
    selection_type.setParseAction(annotate('SelectionType'))
    referenced_value.setParseAction(annotate('ReferencedValue'))

    start = OneOrMore(module_definition)
    return start
Ejemplo n.º 4
0
column_name = Combine(
    Suppress(Literal('col("')) +
    Word(alphas, f"{alphanums}_.").setResultsName("column") +
    Suppress(Literal('")')))
gt = Literal(">")
lt = Literal("<")
ge = Literal(">=")
le = Literal("<=")
eq = Literal("==")
ops = (gt ^ lt ^ ge ^ le ^ eq).setResultsName("op")
fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?").setResultsName(
    "fnumber")
condition_value = Suppress('"') + Word(f"{alphanums}._").setResultsName(
    "condition_value") + Suppress('"') ^ Suppress("'") + Word(
        f"{alphanums}._").setResultsName("condition_value") + Suppress("'")
not_null = CaselessLiteral(".notnull()").setResultsName("notnull")
condition = (column_name + not_null).setParseAction(_set_notnull) ^ (
    column_name + ops + (fnumber ^ condition_value))


class ConditionParserError(ge_exceptions.GreatExpectationsError):
    pass


class RowConditionParserType(enum.Enum):
    """Type of condition or parser to be used to interpret a RowCondition

    Note that many of these are forward looking and are not yet implemented.
    In the future `GE` can replace the `great_expectations__experimental__`
    name for the condition_parser and this enum can be used internally
    instead of strings for the condition_parser user input.
Ejemplo n.º 5
0
def graph_definition():

    global graphparser
    
    if not graphparser:
    
        # punctuation
        colon  = Literal(":")
        lbrace = Literal("{")
        rbrace = Literal("}")
        lbrack = Literal("[")
        rbrack = Literal("]")
        lparen = Literal("(")
        rparen = Literal(")")
        equals = Literal("=")
        comma  = Literal(",")
        dot    = Literal(".")
        slash  = Literal("/")
        bslash = Literal("\\")
        star   = Literal("*")
        semi   = Literal(";")
        at     = Literal("@")
        minus  = Literal("-")
        
        # keywords
        strict_    = CaselessLiteral("strict")
        graph_     = CaselessLiteral("graph")
        digraph_   = CaselessLiteral("digraph")
        subgraph_  = CaselessLiteral("subgraph")
        node_      = CaselessLiteral("node")
        edge_      = CaselessLiteral("edge")
        
        
        # token definitions
        
        identifier = Word(alphanums + "_." ).setName("identifier")
        
        double_quoted_string = QuotedString('"', escChar="\\", multiline=True, unquoteResults=False) # dblQuotedString
        _noncomma = "".join( [ c for c in printables if c != "," ] )
    
        alphastring_ = OneOrMore(CharsNotIn(_noncomma + ' '))

        def parse_html(s, loc, toks):
            return '<%s>' % ''.join(toks[0])
            
        
        opener = '<'
        closer = '>'
        html_text = nestedExpr( opener, closer, 
            ( CharsNotIn( opener + closer )  ) 
                ).setParseAction(parse_html).leaveWhitespace()

        ID = ( identifier | html_text | 
            double_quoted_string | #.setParseAction(strip_quotes) |
            alphastring_ ).setName("ID")
            
        
        float_number = Combine(Optional(minus) +	
            OneOrMore(Word(nums + "."))).setName("float_number")
            
        righthand_id =  (float_number | ID ).setName("righthand_id")

        port_angle = (at + ID).setName("port_angle")
        
        port_location = (OneOrMore(Group(colon + ID)) |	
            Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location")
            
        port = (Group(port_location + Optional(port_angle)) |	
            Group(port_angle + Optional(port_location))).setName("port")
            
        node_id = (ID + Optional(port))
        a_list = OneOrMore(ID + Optional(equals + righthand_id) +
            Optional(comma.suppress())).setName("a_list")
        
        attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) +	
            rbrack.suppress()).setName("attr_list")
        
        attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt")
        
        edgeop = (Literal("--") | Literal("->")).setName("edgeop")
        
        stmt_list = Forward()
        graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) +	
            rbrace.suppress() + Optional(semi.suppress()) ).setName("graph_stmt")
            
            
        edge_point = Forward()
        
        edgeRHS = OneOrMore(edgeop + edge_point)
        edge_stmt = edge_point + edgeRHS + Optional(attr_list)
        
        subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph")
        
        edge_point << Group( subgraph | graph_stmt | node_id ).setName('edge_point')
        
        node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt")
        
        assignment = (ID + equals + righthand_id).setName("assignment")
        stmt =  (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt")
        stmt_list << OneOrMore(stmt + Optional(semi.suppress()))
        
        graphparser = OneOrMore( (Optional(strict_) + Group((graph_ | digraph_)) +
            Optional(ID) + graph_stmt).setResultsName("graph") )
        
        singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine)
        
        
        # actions
        
        graphparser.ignore(singleLineComment)
        graphparser.ignore(cStyleComment)
        
        assignment.setParseAction(push_attr_list)
        a_list.setParseAction(push_attr_list)
        edge_stmt.setParseAction(push_edge_stmt)
        node_stmt.setParseAction(push_node_stmt)
        attr_stmt.setParseAction(push_default_stmt)
        
        subgraph.setParseAction(push_subgraph_stmt)
        graph_stmt.setParseAction(push_graph_stmt)
        graphparser.setParseAction(push_top_graph_stmt)
        
    
    return graphparser
Ejemplo n.º 6
0
class CreateParser(object):
    """
    This class can take a plain "CREATE TABLE" SQL as input and parse it into
    a Table object, so that we have more insight on the detail of this SQL.

    Example:
    sql = 'create table foo ( bar int primary key )'
    parser = CreateParser(sql)
    try:
        tbl_obj = parser.parse()
    except ParseError:
        log.error("Failed to parse SQL")

    This set of BNF rules are basically translated from the MySQL manual:
    http://dev.mysql.com/doc/refman/5.6/en/create-table.html
    If you don't know how to change the rule or fix the bug,
    <Getting Started with Pyparsing> is probably the best book to start with.
    Also this wiki has all supported functions listed:
    https://pyparsing.wikispaces.com/HowToUsePyparsing
    If you want have more information how these characters are
    matching, add .setDebug(True) after the specific token you want to debug
    """
    # Basic token
    WORD_CREATE = CaselessLiteral("CREATE").suppress()
    WORD_TABLE = CaselessLiteral("TABLE").suppress()
    COMMA = Literal(',').suppress()
    DOT = Literal('.')
    LEFT_PARENTHESES = Literal('(').suppress()
    RIGHT_PARENTHESES = Literal(')').suppress()
    QUOTE = Literal("'") | Literal('"')
    BACK_QUOTE = Optional(Literal('`')).suppress()
    LENGTH = Word(nums)
    OBJECT_NAME = Word(alphanums + "_" + "-" + "<" + ">" + ":")
    QUOTED_STRING_WITH_QUOTE = QuotedString(
        quoteChar="'",
        escQuote="''",
        escChar='\\',
        multiline=True,
        unquoteResults=False) | QuotedString(quoteChar='"',
                                             escQuote='""',
                                             escChar='\\',
                                             multiline=True,
                                             unquoteResults=False)
    QUOTED_STRING = QuotedString(
        quoteChar="'", escQuote="''", escChar='\\',
        multiline=True) | QuotedString(
            quoteChar='"', escQuote='""', escChar='\\', multiline=True)
    # Start of a create table statement
    # Sample: this part of rule will match following section
    # `table_name` IF NOT EXISTS
    IF_NOT_EXIST = Optional(
        CaselessLiteral("IF") + CaselessLiteral("NOT") +
        CaselessLiteral("EXISTS")).suppress()
    TABLE_NAME = (QuotedString(
        quoteChar="`", escQuote="``", escChar='\\', unquoteResults=True)
                  | OBJECT_NAME)('table_name')

    # Column definition
    # Sample: this part of rule will match following section
    # `id` bigint(20) unsigned NOT NULL DEFAULT '0',
    COLUMN_NAME = (QuotedString(
        quoteChar="`", escQuote="``", escChar='\\', unquoteResults=True)
                   | OBJECT_NAME)('column_name')
    COLUMN_NAME_WITH_QUOTE = (QuotedString(
        quoteChar="`", escQuote="``", escChar='\\', unquoteResults=False)
                              | OBJECT_NAME)('column_name')
    UNSIGNED = Optional(CaselessLiteral("UNSIGNED"))('unsigned')
    ZEROFILL = Optional(CaselessLiteral("ZEROFILL"))('zerofill')
    COL_LEN = Combine(LEFT_PARENTHESES + LENGTH + RIGHT_PARENTHESES,
                      adjacent=False)('length')
    INT_TYPE = (CaselessLiteral("TINYINT") | CaselessLiteral("SMALLINT")
                | CaselessLiteral("MEDIUMINT") | CaselessLiteral("INT")
                | CaselessLiteral("INTERGER") | CaselessLiteral("BIGINT")
                | CaselessLiteral("BINARY") | CaselessLiteral("BIT"))
    INT_DEF = (INT_TYPE('column_type') + Optional(COL_LEN) + UNSIGNED +
               ZEROFILL)
    VARBINARY_DEF = (CaselessLiteral('VARBINARY')('column_type') + COL_LEN)
    FLOAT_TYPE = \
        CaselessLiteral("REAL") | CaselessLiteral("DOUBLE") |\
        CaselessLiteral("FLOAT") | CaselessLiteral("DECIMAL") |\
        CaselessLiteral("NUMERIC")
    FLOAT_LEN = Combine(LEFT_PARENTHESES + LENGTH + Optional(COMMA + LENGTH) +
                        RIGHT_PARENTHESES,
                        adjacent=False,
                        joinString=', ')('length')
    FLOAT_DEF = (FLOAT_TYPE('column_type') + Optional(FLOAT_LEN) + UNSIGNED +
                 ZEROFILL)
    # time type definition. They contain type_name and an optional FSP section
    # Sample: DATETIME[(fsp)]
    FSP = COL_LEN
    DT_DEF = (
        Combine(CaselessLiteral("TIME") + Optional(CaselessLiteral("STAMP")))
        | CaselessLiteral("DATETIME"))('column_type') + Optional(FSP)
    SIMPLE_DEF = (CaselessLiteral("DATE") | CaselessLiteral("YEAR")
                  | CaselessLiteral("TINYBLOB") | CaselessLiteral("BLOB")
                  | CaselessLiteral("MEDIUMBLOB") | CaselessLiteral("LONGBLOB")
                  | CaselessLiteral("BOOL")
                  | CaselessLiteral("BOOLEAN"))('column_type')
    OPTIONAL_COL_LEN = Optional(COL_LEN)
    BINARY = Optional(CaselessLiteral("BINARY"))('binary')
    CHARSET_NAME = (Optional(QUOTE).suppress() +
                    Word(alphanums + '_')('charset') +
                    Optional(QUOTE).suppress())
    COLLATION_NAME = (Optional(QUOTE).suppress() +
                      Word(alphanums + '_')('collate') +
                      Optional(QUOTE).suppress())
    CHARSET_DEF = (CaselessLiteral("CHARACTER SET").suppress() + CHARSET_NAME)
    COLLATE_DEF = (CaselessLiteral("COLLATE").suppress() + COLLATION_NAME)
    CHAR_DEF = (CaselessLiteral("CHAR")('column_type') + OPTIONAL_COL_LEN +
                BINARY)
    VARCHAR_DEF = (CaselessLiteral("VARCHAR")('column_type') + COL_LEN +
                   BINARY)
    TEXT_TYPE = (CaselessLiteral("TINYTEXT") | CaselessLiteral("TEXT")
                 | CaselessLiteral("MEDIUMTEXT") | CaselessLiteral("LONGTEXT")
                 | CaselessLiteral("DOCUMENT"))
    TEXT_DEF = (TEXT_TYPE('column_type') + BINARY)
    ENUM_VALUE_LIST = Group(QUOTED_STRING_WITH_QUOTE +
                            ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE))(
                                'enum_value_list')
    ENUM_DEF = (CaselessLiteral("ENUM")('column_type') + LEFT_PARENTHESES +
                ENUM_VALUE_LIST + RIGHT_PARENTHESES)
    SET_VALUE_LIST = Group(QUOTED_STRING_WITH_QUOTE +
                           ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE))(
                               'set_value_list')
    SET_DEF = (CaselessLiteral("SET")('column_type') + LEFT_PARENTHESES +
               SET_VALUE_LIST + RIGHT_PARENTHESES)
    DATA_TYPE = (INT_DEF | FLOAT_DEF | DT_DEF | SIMPLE_DEF | TEXT_DEF
                 | CHAR_DEF | VARCHAR_DEF | ENUM_DEF | SET_DEF | VARBINARY_DEF)

    # Column attributes come after column type and length
    NULLABLE = (CaselessLiteral("NULL") | CaselessLiteral("NOT NULL"))
    DEFAULT_VALUE = (CaselessLiteral("DEFAULT").suppress() + (
        Optional(Literal('b'))('is_bit') + QUOTED_STRING_WITH_QUOTE('default')
        | Combine(
            CaselessLiteral("CURRENT_TIMESTAMP")('default') + Optional(COL_LEN)
            ('ts_len')) | Word(alphanums + '_' + '-' + '+')('default')))
    ON_UPDATE = (CaselessLiteral("ON") + CaselessLiteral("UPDATE") +
                 (CaselessLiteral("CURRENT_TIMESTAMP")('on_update') +
                  Optional(COL_LEN)('on_update_ts_len')))
    AUTO_INCRE = CaselessLiteral("AUTO_INCREMENT")
    UNIQ_KEY = (CaselessLiteral("UNIQUE") +
                Optional(CaselessLiteral("KEY")).suppress())
    PRIMARY_KEY = (CaselessLiteral("PRIMARY") +
                   Optional(CaselessLiteral("KEY")).suppress())
    COMMENT = Combine(CaselessLiteral("COMMENT").suppress() +
                      QUOTED_STRING_WITH_QUOTE,
                      adjacent=False)
    COLUMN_DEF = Group(COLUMN_NAME + DATA_TYPE + ZeroOrMore(
        NULLABLE('nullable') | DEFAULT_VALUE | ON_UPDATE
        | AUTO_INCRE('auto_increment') | UNIQ_KEY('uniq_key')
        | PRIMARY_KEY('primary') | COMMENT('comment') | CHARSET_DEF
        | COLLATE_DEF))
    COLUMN_LIST = Group(COLUMN_DEF +
                        ZeroOrMore(COMMA + COLUMN_DEF))('column_list')

    DOCUMENT_PATH = Combine(COLUMN_NAME_WITH_QUOTE +
                            ZeroOrMore(DOT + COLUMN_NAME_WITH_QUOTE))
    IDX_COL = ((Group(DOCUMENT_PATH + CaselessLiteral('AS') +
                      (CaselessLiteral('INT') | CaselessLiteral('STRING')) +
                      Optional(COL_LEN, default=''))) |
               (Group(COLUMN_NAME + Optional(COL_LEN, default=''))))

    # Primary key section
    COL_NAME_LIST = Group(IDX_COL + ZeroOrMore(COMMA + IDX_COL))
    IDX_COLS = (LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES)
    WORD_PRI_KEY = (CaselessLiteral("PRIMARY").suppress() +
                    CaselessLiteral("KEY").suppress())
    KEY_BLOCK_SIZE = (CaselessLiteral("KEY_BLOCK_SIZE").suppress() +
                      Optional(Literal('=')) +
                      Word(nums)('idx_key_block_size'))
    INDEX_USING = (
        CaselessLiteral("USING").suppress() +
        (CaselessLiteral("BTREE") | CaselessLiteral("HASH"))('idx_using'))

    INDEX_OPTION = (ZeroOrMore(KEY_BLOCK_SIZE | COMMENT('idx_comment')
                               | INDEX_USING))
    PRI_KEY_DEF = (COMMA + WORD_PRI_KEY + IDX_COLS('pri_list') + INDEX_OPTION)

    # Index section
    KEY_TYPE = (CaselessLiteral("FULLTEXT")
                | CaselessLiteral("SPATIAL"))('key_type')
    WORD_UNIQUE = CaselessLiteral("UNIQUE")('unique')
    WORD_KEY = (CaselessLiteral("INDEX").suppress()
                | CaselessLiteral("KEY").suppress())
    IDX_NAME = Optional(COLUMN_NAME)
    IDX_DEF = (ZeroOrMore(
        Group(COMMA + Optional(WORD_UNIQUE | KEY_TYPE) + WORD_KEY +
              IDX_NAME('index_name') + IDX_COLS('index_col_list') +
              INDEX_OPTION)))('index_section')

    # Constraint section as this is not a recommended way of using MySQL
    # we'll treat the whole section as a string
    CONSTRAINT = Combine(
        ZeroOrMore(COMMA + Optional(CaselessLiteral('CONSTRAINT')) +
                   # foreign key name except the key word 'FOREIGN'
                   Optional((~CaselessLiteral('FOREIGN') + COLUMN_NAME)) +
                   CaselessLiteral('FOREIGN') + CaselessLiteral('KEY') +
                   LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES +
                   CaselessLiteral('REFERENCES') + COLUMN_NAME +
                   LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES +
                   ZeroOrMore(Word(alphanums))),
        adjacent=False,
        joinString=' ')('constraint')

    # Table option section
    ENGINE = (CaselessLiteral("ENGINE").suppress() +
              Optional(Literal('=')).suppress() +
              COLUMN_NAME('engine').setParseAction(upcaseTokens))
    DEFAULT_CHARSET = (Optional(CaselessLiteral("DEFAULT")).suppress() +
                       ((CaselessLiteral("CHARACTER").suppress() +
                         CaselessLiteral("SET").suppress()) |
                        (CaselessLiteral("CHARSET").suppress())) +
                       Optional(Literal('=')).suppress() +
                       Word(alphanums + '_')('charset'))
    TABLE_COLLATE = (Optional(CaselessLiteral("DEFAULT")).suppress() +
                     CaselessLiteral("COLLATE").suppress() +
                     Optional(Literal('=')).suppress() + COLLATION_NAME)
    ROW_FORMAT = (
        CaselessLiteral("ROW_FORMAT").suppress() +
        Optional(Literal('=')).suppress() +
        Word(alphanums + '_')('row_format').setParseAction(upcaseTokens))
    TABLE_KEY_BLOCK_SIZE = (
        CaselessLiteral("KEY_BLOCK_SIZE").suppress() +
        Optional(Literal('=')).suppress() + Word(nums)
        ('key_block_size').setParseAction(lambda s, l, t: [int(t[0])]))
    COMPRESSION = (
        CaselessLiteral("COMPRESSION").suppress() +
        Optional(Literal('=')).suppress() +
        Word(alphanums + '_')('compression').setParseAction(upcaseTokens))
    # Parse and make sure auto_increment is an interger
    # parseAction function is defined as fn( s, loc, toks ), where:
    # s is the original parse string
    # loc is the location in the string where matching started
    # toks is the list of the matched tokens, packaged as a ParseResults_
    # object
    TABLE_AUTO_INCRE = (
        CaselessLiteral("AUTO_INCREMENT").suppress() +
        Optional(Literal('=')).suppress() + Word(nums)
        ('auto_increment').setParseAction(lambda s, l, t: [int(t[0])]))
    TABLE_COMMENT = (CaselessLiteral("COMMENT").suppress() +
                     Optional(Literal('=')).suppress() +
                     QUOTED_STRING_WITH_QUOTE('comment'))
    TABLE_OPTION = ZeroOrMore(ENGINE | DEFAULT_CHARSET | TABLE_COLLATE
                              | ROW_FORMAT | TABLE_KEY_BLOCK_SIZE | COMPRESSION
                              | TABLE_AUTO_INCRE | TABLE_COMMENT)

    # Partition section
    PARTITION = Optional(
        Combine(Combine(Optional(Literal('/*!') + Word(nums))) +
                CaselessLiteral("PARTITION") + CaselessLiteral("BY") +
                SkipTo(StringEnd()),
                adjacent=False,
                joinString=" ")('partition'))

    @classmethod
    def generate_rule(cls):
        # The final rule for the whole statement match
        return (cls.WORD_CREATE + cls.WORD_TABLE + cls.IF_NOT_EXIST +
                cls.TABLE_NAME + cls.LEFT_PARENTHESES + cls.COLUMN_LIST +
                Optional(cls.PRI_KEY_DEF) + cls.IDX_DEF + cls.CONSTRAINT +
                cls.RIGHT_PARENTHESES + cls.TABLE_OPTION('table_options') +
                cls.PARTITION)

    @classmethod
    def parse(cls, sql):
        try:
            if not isinstance(sql, str):
                sql = sql.decode('utf-8')
            result = cls.generate_rule().parseString(sql)
        except ParseException as e:
            raise ParseError(
                "Failed to parse SQL, unsupported syntax: {}".format(e),
                e.line, e.column)

        inline_pri_exists = False
        table = models.Table()
        table.name = result.table_name
        table_options = [
            'engine', 'charset', 'collate', 'row_format', 'key_block_size',
            'compression', 'auto_increment', 'comment'
        ]
        for table_option in table_options:
            if table_option in result:
                setattr(table, table_option, result.get(table_option))
        if 'partition' in result:
            # pyparsing will convert newline into two after parsing. So we
            # need to dedup here
            table.partition = result.partition.replace("\n\n", "\n")
        if 'constraint' in result:
            table.constraint = result.constraint
        for column_def in result.column_list:
            if column_def.column_type == 'ENUM':
                column = models.EnumColumn()
                for enum_value in column_def.enum_value_list:
                    column.enum_list.append(enum_value)
            elif column_def.column_type == 'SET':
                column = models.SetColumn()
                for set_value in column_def.set_value_list:
                    column.set_list.append(set_value)
            elif column_def.column_type in ('TIMESTAMP', 'DATETIME'):
                column = models.TimestampColumn()
                if 'on_update' in column_def:
                    if 'on_update_ts_len' in column_def:
                        column.on_update_current_timestamp = \
                            "{}({})".format(
                                column_def.on_update,
                                column_def.on_update_ts_len)
                    else:
                        column.on_update_current_timestamp = \
                            column_def.on_update
            else:
                column = models.Column()

            column.name = column_def.column_name
            column.column_type = column_def.column_type

            # We need to check whether each column property exist in the
            # create table string, because not specifying a "COMMENT" is
            # different from specifying "COMMENT" equals to empty string.
            # The former one will ends up being
            #   column=None
            # and the later one being
            #   column=''
            if 'comment' in column_def:
                column.comment = column_def.comment
            if 'nullable' in column_def:
                if column_def.nullable == 'NULL':
                    column.nullable = True
                elif column_def.nullable == 'NOT NULL':
                    column.nullable = False
            if 'unsigned' in column_def:
                if column_def.unsigned == 'UNSIGNED':
                    column.unsigned = True
            if 'default' in column_def:
                if 'ts_len' in column_def:
                    column.default = "{}({})".format(column_def.default,
                                                     column_def.ts_len)
                else:
                    column.default = column_def.default
                if 'is_bit' in column_def:
                    column.is_default_bit = True
            if 'charset' in column_def:
                column.charset = column_def.charset
            if 'length' in column_def:
                column.length = column_def.length
            if 'collate' in column_def:
                column.collate = column_def.collate
            if 'auto_increment' in column_def:
                column.auto_increment = True
            if 'primary' in column_def:
                idx_col = models.IndexColumn()
                idx_col.name = column_def.column_name
                table.primary_key.column_list.append(idx_col)
                inline_pri_exists = True
            table.column_list.append(column)
        if 'pri_list' in result:
            if inline_pri_exists:
                raise ParseError("Multiple primary keys defined")
            table.primary_key.name = 'PRIMARY'
            for col in result.pri_list:
                for name, length in col:
                    idx_col = models.IndexColumn()
                    idx_col.name = name
                    if length:
                        idx_col.length = length
                    table.primary_key.column_list.append(idx_col)
            if 'idx_key_block_size' in result:
                table.primary_key.key_block_size = result.pri_key_block_size
            if 'idx_comment' in result:
                table.primary_key.comment = result.idx_comment
        if 'index_section' in result:
            for idx_def in result.index_section:
                idx = models.TableIndex()
                idx.name = idx_def.index_name
                if 'idx_key_block_size' in idx_def:
                    idx.key_block_size = idx_def.idx_key_block_size
                if 'idx_comment' in idx_def:
                    idx.comment = idx_def.idx_comment
                if 'idx_using' in idx_def:
                    idx.using = idx_def.idx_using
                if 'key_type' in idx_def:
                    idx.key_type = idx_def.key_type
                if 'unique' in idx_def:
                    idx.is_unique = True
                for col in idx_def.index_col_list:
                    for col_def in col:
                        if len(col_def) == 4 and col_def[1].upper() == 'AS':
                            (document_path, word_as, key_type,
                             length) = col_def
                            idx_col = models.DocStoreIndexColumn()
                            idx_col.document_path = document_path
                            idx_col.key_type = key_type
                            if length:
                                idx_col.length = length
                            idx.column_list.append(idx_col)
                        else:
                            (name, length) = col_def
                            idx_col = models.IndexColumn()
                            idx_col.name = name
                            if length:
                                idx_col.length = length
                            idx.column_list.append(idx_col)
                table.indexes.append(idx)
        return table
Ejemplo n.º 7
0
    def define_dot_parser(self):
        """Define dot grammar

        Based on the grammar http://www.graphviz.org/doc/info/lang.html
        """
        # punctuation
        colon = Literal(":")
        lbrace = Suppress("{")
        rbrace = Suppress("}")
        lbrack = Suppress("[")
        rbrack = Suppress("]")
        lparen = Literal("(")
        rparen = Literal(")")
        equals = Suppress("=")
        comma = Literal(",")
        dot = Literal(".")
        slash = Literal("/")
        bslash = Literal("\\")
        star = Literal("*")
        semi = Suppress(";")
        at = Literal("@")
        minus = Literal("-")
        pluss = Suppress("+")

        # keywords
        strict_ = CaselessLiteral("strict")
        graph_ = CaselessLiteral("graph")
        digraph_ = CaselessLiteral("digraph")
        subgraph_ = CaselessLiteral("subgraph")
        node_ = CaselessLiteral("node")
        edge_ = CaselessLiteral("edge")

        punctuation_ = "".join([c for c in string.punctuation if c not in '_'
                                ]) + string.whitespace
        # token definitions

        identifier = Word(alphanums + "_").setName("identifier")

        #double_quoted_string = QuotedString('"', multiline=True,escChar='\\',
        #    unquoteResults=True) # dblQuotedString
        double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE)
        double_quoted_string.setParseAction(removeQuotes)
        quoted_string = Combine(
            double_quoted_string +
            Optional(OneOrMore(pluss + double_quoted_string)),
            adjacent=False)
        alphastring_ = OneOrMore(CharsNotIn(punctuation_))

        def parse_html(s, loc, toks):
            return '<<%s>>' % ''.join(toks[0])

        opener = '<'
        closer = '>'
        try:
            html_text = pyparsing.nestedExpr(
                opener, closer,
                ((CharsNotIn(opener + closer).setParseAction(lambda t: t[0]))
                 )).setParseAction(parse_html)
        except:
            log.debug('nestedExpr not available.')
            log.warning('Old version of pyparsing detected. Version 1.4.8 or '
                        'later is recommended. Parsing of html labels may not '
                        'work properly.')
            html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]")))

        ID = (
            alphastring_ | html_text | quoted_string
            |  #.setParseAction(strip_quotes) |
            identifier).setName("ID")

        float_number = Combine(Optional(minus) +
                               OneOrMore(Word(nums +
                                              "."))).setName("float_number")

        righthand_id = (float_number | ID).setName("righthand_id")

        port_angle = (at + ID).setName("port_angle")

        port_location = ((OneOrMore(Group(colon + ID))
                          | Group(colon + lparen + ID + comma + ID +
                                  rparen))).setName("port_location")

        port = Combine(
            (Group(port_location + Optional(port_angle))
             | Group(port_angle + Optional(port_location)))).setName("port")

        node_id = (ID + Optional(port))
        a_list = OneOrMore(ID + Optional(equals + righthand_id) +
                           Optional(comma.suppress())).setName("a_list")

        attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName(
            "attr_list").setResultsName('attrlist')

        attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt")

        edgeop = (Literal("--") | Literal("->")).setName("edgeop")

        stmt_list = Forward()
        graph_stmt = (lbrace + Optional(stmt_list) + rbrace +
                      Optional(semi)).setName("graph_stmt")

        edge_point = Forward()

        edgeRHS = OneOrMore(edgeop + edge_point)
        edge_stmt = edge_point + edgeRHS + Optional(attr_list)

        subgraph = (
            Optional(subgraph_, '') + Optional(ID, '') +
            Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph')

        edge_point << (subgraph | graph_stmt | node_id)

        node_stmt = (node_id + Optional(attr_list) +
                     Optional(semi)).setName("node_stmt")

        assignment = (ID + equals + righthand_id).setName("assignment")
        stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt
                | node_stmt).setName("stmt")
        stmt_list << OneOrMore(stmt + Optional(semi))

        graphparser = ((Optional(strict_, 'notstrict') +
                        ((graph_ | digraph_)) + Optional(ID, '') + lbrace +
                        Group(Optional(stmt_list)) +
                        rbrace).setResultsName("graph"))

        singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine)

        # actions
        graphparser.ignore(singleLineComment)
        graphparser.ignore(cStyleComment)
        node_id.setParseAction(self._proc_node_id)
        assignment.setParseAction(self._proc_attr_assignment)
        a_list.setParseAction(self._proc_attr_list)
        edge_stmt.setParseAction(self._proc_edge_stmt)
        node_stmt.setParseAction(self._proc_node_stmt)
        attr_stmt.setParseAction(self._proc_default_attr_stmt)
        attr_list.setParseAction(self._proc_attr_list_combine)
        subgraph.setParseAction(self._proc_subgraph_stmt)
        #graph_stmt.setParseAction(self._proc_graph_stmt)
        graphparser.setParseAction(self._main_graph_stmt)
        return graphparser
Ejemplo n.º 8
0
    def fromString(inputText, verbose=False):
        if verbose: print 'Verbose:', verbose
        text = nestedExpr("/*", "*/").suppress().transformString(inputText)

        semicolon = Suppress(Word(";"))
        quote = Suppress(Word("\""))
        op = Suppress(Word("{"))
        cl = Suppress(Word("}"))
        opp = Suppress(Word("("))
        clp = Suppress(Word(")"))
        identifier = Word(alphas + "_", alphanums + "_")

        # Imports
        idslImport = Suppress(CaselessLiteral("import")) + quote + CharsNotIn(
            "\";").setResultsName('path') + quote + semicolon
        idslImports = OneOrMore(idslImport)
        # Communications
        implementsList = Group(
            CaselessLiteral('implements') + identifier +
            ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon)
        requiresList = Group(
            CaselessLiteral('requires') + identifier +
            ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon)
        subscribesList = Group(
            CaselessLiteral('subscribesTo') + identifier +
            ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon)
        publishesList = Group(
            CaselessLiteral('publishes') + identifier +
            ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon)
        communicationList = implementsList | requiresList | subscribesList | publishesList
        communications = Group(
            Suppress(CaselessLiteral("communications")) + op +
            ZeroOrMore(communicationList) + cl + semicolon)

        # Language
        language = Suppress(CaselessLiteral("language")) + (
            CaselessLiteral("cpp") | CaselessLiteral("python")) + semicolon
        # GUI
        gui = Group(
            Optional(
                Suppress(CaselessLiteral("gui")) + CaselessLiteral("Qt") +
                opp + identifier + clp + semicolon))
        # additional options
        options = Group(
            Optional(
                Suppress(CaselessLiteral("options")) + identifier +
                ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon))

        componentContents = communications.setResultsName(
            'communications') & language.setResultsName(
                'language') & gui.setResultsName(
                    'gui') & options.setResultsName('options')
        component = Suppress(
            CaselessLiteral("component")) + identifier.setResultsName(
                "name") + op + componentContents.setResultsName(
                    "properties") + cl + semicolon

        CDSL = idslImports.setResultsName(
            "imports") + component.setResultsName("component")
        CDSL.ignore(cppStyleComment)
        tree = CDSL.parseString(text)
        return CDSLParsing.component(tree)
Ejemplo n.º 9
0
class SearchParser(object):
    """The parser for bauble.search.MapperSearch
    """

    numeric_value = Regex(r'[-]?\d+(\.\d*)?([eE]\d+)?').setParseAction(
        NumericToken)('number')
    unquoted_string = Word(alphanums + alphas8bit + '%.-_*;:')
    string_value = (quotedString.setParseAction(removeQuotes)
                    | unquoted_string).setParseAction(StringToken)('string')

    none_token = Literal('None').setParseAction(NoneToken)
    empty_token = Literal('Empty').setParseAction(EmptyToken)

    value_list = Forward()
    typed_value = (Literal("|") + unquoted_string + Literal("|") + value_list +
                   Literal("|")).setParseAction(TypedValueToken)

    value = (typed_value | numeric_value | none_token | empty_token
             | string_value).setParseAction(ValueToken)('value')
    value_list << Group(OneOrMore(value) ^ delimitedList(value)
                        ).setParseAction(ValueListAction)('value_list')

    domain = Word(alphas, alphanums)
    binop = oneOf('= == != <> < <= > >= not like contains has ilike '
                  'icontains ihas is')
    equals = Literal('=')
    star_value = Literal('*')
    domain_values = (value_list.copy())('domain_values')
    domain_expression = (
        (domain + equals + star_value + stringEnd)
        | (domain + binop + domain_values + stringEnd)
    ).setParseAction(DomainExpressionAction)('domain_expression')

    AND_ = WordStart() + (CaselessLiteral("AND") | Literal("&&")) + WordEnd()
    OR_ = WordStart() + (CaselessLiteral("OR") | Literal("||")) + WordEnd()
    NOT_ = WordStart() + (CaselessLiteral("NOT") | Literal('!')) + WordEnd()
    BETWEEN_ = WordStart() + CaselessLiteral("BETWEEN") + WordEnd()

    query_expression = Forward()('filter')
    identifier = Group(delimitedList(Word(alphas + '_', alphanums + '_'),
                                     '.')).setParseAction(IdentifierToken)
    ident_expression = (
        Group(identifier + binop + value).setParseAction(IdentExpressionToken)
        | (Literal('(') + query_expression +
           Literal(')')).setParseAction(ParenthesisedQuery))
    between_expression = Group(identifier + BETWEEN_ + value + AND_ +
                               value).setParseAction(BetweenExpressionAction)
    query_expression << infixNotation(
        (ident_expression | between_expression),
        [(NOT_, 1, opAssoc.RIGHT, SearchNotAction),
         (AND_, 2, opAssoc.LEFT, SearchAndAction),
         (OR_, 2, opAssoc.LEFT, SearchOrAction)])
    query = (domain + Keyword('where', caseless=True).suppress() +
             Group(query_expression) + stringEnd).setParseAction(QueryAction)

    statement = (query('query')
                 | domain_expression('domain')
                 | value_list('value_list')
                 ).setParseAction(StatementAction)('statement')

    def parse_string(self, text):
        '''request pyparsing object to parse text

        `text` can be either a query, or a domain expression, or a list of
        values. the `self.statement` pyparsing object parses the input text
        and return a pyparsing.ParseResults object that represents the input
        '''

        return self.statement.parseString(text)
Ejemplo n.º 10
0
#################
# BEGIN GRAMMAR
#################

COLON = Literal(":").suppress()
CONCAT = Literal("+").suppress()
EQUALS = Literal("=").suppress()
LANGLE = Literal("<").suppress()
LBRACE = Literal("[").suppress()
LPAREN = Literal("(").suppress()
PERIOD = Literal(".").suppress()
RANGLE = Literal(">").suppress()
RBRACE = Literal("]").suppress()
RPAREN = Literal(")").suppress()

CATEGORIES = CaselessLiteral("categories").suppress()
END = CaselessLiteral("end").suppress()
FONT = CaselessLiteral("font").suppress()
HINT = CaselessLiteral("hint").suppress()
ITEM = CaselessLiteral("item").suppress()
OBJECT = CaselessLiteral("object").suppress()

attribute_value_pair = Forward()  # this is recursed in item_list_entry

simple_identifier = Word(alphas, alphanums + "_")
identifier = Combine(simple_identifier +
                     ZeroOrMore(Literal(".") + simple_identifier))
object_name = identifier
object_type = identifier

# Integer and floating point values are converted to Python longs and floats, respectively.
Ejemplo n.º 11
0
    def parse_file(self):
        """Parses an existing namelist file and creates a deck of cards to
        hold the data. After this is executed, you need to call the ``load_model()``
        method to extract the variables from this data structure."""

        infile = open(self.filename, 'r')
        data = infile.readlines()
        infile.close()

        # Lots of numerical tokens for recognizing various kinds of numbers
        digits = Word(nums)
        dot = "."
        sign = oneOf("+ -")
        ee = CaselessLiteral('E') | CaselessLiteral('D')

        num_int = ToInteger(Combine( Optional(sign) + digits ))

        num_float = ToFloat(Combine( Optional(sign) +
                            ((digits + dot + Optional(digits)) |
                             (dot + digits)) +
                             Optional(ee + Optional(sign) + digits)
                            ))

        # special case for a float written like "3e5"
        mixed_exp = ToFloat(Combine( digits + ee + Optional(sign) + digits ))

        # I don't suppose we need these, but just in case (plus it's easy)
        nan = ToFloat(oneOf("NaN Inf -Inf"))

        numval = num_float | mixed_exp | num_int | nan
        strval =  QuotedString(quoteChar='"') | QuotedString(quoteChar="'")
        b_list = "T TRUE True true F FALSE False false .TRUE. .FALSE. .T. .F."
        boolval = ToBool(oneOf(b_list))
        fieldval = Word(alphanums)

        # Tokens for parsing a line of data
        numstr_token = numval + ZeroOrMore(Suppress(',') + numval) \
                   | strval
        data_token = numstr_token | boolval
        index_token = Suppress('(') + num_int + Suppress(')')

        card_token = Group(fieldval("name") +
                           Optional(index_token("index")) +
                           Suppress('=') +
                           Optional(num_int("dimension") + Suppress('*')) +
                           data_token("value") +
                           Optional(Suppress('*') + num_int("dimension")))
        multi_card_token = (card_token + ZeroOrMore(Suppress(',') + card_token))
        array_continuation_token = numstr_token.setResultsName("value")
        array2D_token = fieldval("name") + Suppress("(") + \
                        Suppress(num_int) + Suppress(',') + \
                        num_int("index") + Suppress(')') + \
                        Suppress('=') + numval + \
                        ZeroOrMore(Suppress(',') + numval)

        # Tokens for parsing the group head and tai
        group_end_token = Literal("/") | \
                          Literal("$END") | Literal("$end") | \
                          Literal("&END") | Literal("&end")
        group_name_token = (Literal("$") | Literal("&")) + \
                           Word(alphanums).setResultsName("name") + \
                           Optional(multi_card_token) + \
                           Optional(group_end_token)

        # Comment Token
        comment_token = Literal("!")

        # Loop through each line and parse.

        current_group = None
        for line in data:
            line_base = line
            line = line.strip()

            # blank line: do nothing
            if not line:
                continue

            if current_group:

                # Skip comment cards
                if comment_token.searchString(line):
                    pass

                # Process orindary cards
                elif multi_card_token.searchString(line):
                    cards = multi_card_token.parseString(line)

                    for card in cards:
                        name, value = _process_card_info(card)
                        self.cards[-1].append(Card(name, value))

                # Catch 2D arrays like -> X(1,1) = 3,4,5
                elif array2D_token.searchString(line):
                    card = array2D_token.parseString(line)

                    name = card[0]
                    index = card[1]
                    value = array(card[2:])

                    if index > 1:
                        old_value = self.cards[-1][-1].value
                        new_value = vstack((old_value, value))
                        self.cards[-1][-1].value = new_value
                    else:
                        self.cards[-1].append(Card(name, value))

                # Arrays can be continued on subsequent lines
                # The value of the most recent card must be turned into an
                # array and appended
                elif array_continuation_token.searchString(line):
                    card = array_continuation_token.parseString(line)

                    if len(card) > 1:
                        element = array(card[0:])
                    else:
                        element = card.value

                    if isinstance(self.cards[-1][-1].value, ndarray):
                        new_value = append(self.cards[-1][-1].value, element)
                    else:
                        new_value = array([self.cards[-1][-1].value, element])

                    self.cards[-1][-1].value = new_value

                # Lastly, look for the group footer
                elif group_end_token.searchString(line):
                    current_group = None

                # Everything else must be a pure comment

                # Group ending '/' can also conclude a data line.
                if line[-1] == '/':
                    current_group = None

            else:
                group_name = group_name_token.searchString(line)

                # Group Header
                if group_name:
                    group_name = group_name_token.parseString(line)
                    current_group = group_name.name
                    self.add_group(current_group)

                    # Sometimes, variable definitions are included on the
                    # same line as the namelist header
                    if len(group_name) > 2:
                        cards = group_name[2:]

                        for card in cards:
                            # Sometimes an end card is on the same line.
                            if group_end_token.searchString(card):
                                current_group = None
                            else:
                                name, value = _process_card_info(card)
                                self.cards[-1].append(Card(name, value))

                # If there is an ungrouped card at the start, take it as the
                # title for the analysis
                elif len(self.cards) == 0 and self.title == '':
                    self.title = line

                # All other ungrouped cards are saved as free-form (card-less)
                # groups.
                # Note that we can't lstrip because column spacing might be
                # important.
                else:
                    self.add_group(line_base.rstrip())
Ejemplo n.º 12
0
def create_bnf(stack):
    point = Literal(".")
    comma = Literal(",")
    e = CaselessLiteral("E")
    inumber = Word(nums)
    fnumber = Combine(
        Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) +
        Optional(e + Word("+-" + nums, nums)))
    _of = Literal('of')
    _in = Literal('in')
    _by = Literal('by')
    _copy = Literal('copy')

    _mn = Literal('-n').setParseAction(replace('OA_SubN'))
    _me = Literal('-e').setParseAction(replace('OA_SubE'))
    _pn = Literal('+n').setParseAction(replace('OA_AddN'))
    _pe = Literal('+e').setParseAction(replace('OA_AddE'))
    _inn = Literal('*n').setParseAction(replace('OA_IntersectN'))
    _ine = Literal('*e').setParseAction(replace('OA_IntersectE'))
    regop = (_mn | _me | _pn | _pe | _inn | _ine)

    lpar = Literal("(").suppress()
    rpar = Literal(")").suppress()

    _all = Literal('all').setParseAction(replace('KW_All'))
    node = Literal('node')
    nodes = Literal('nodes')
    element = Literal('element')
    elements = Literal('elements')
    group = Literal('group')
    _set = Literal('set')
    surface = Literal('surface')

    ident = Word(alphas + '_.', alphanums + '_.')
    set_name = Word(nums) | ident

    function = Word(alphas + '_', alphanums + '_')
    function = Group(function).setParseAction(join_tokens)

    region = Combine(
        Literal('r.') + Word(alphas + '_', '_' + alphas + nums + '.'))
    region = Group(Optional(_copy, default='nocopy') + region)
    region.setParseAction(replace('KW_Region', keep=True))

    coor = oneOf('x y z')
    boolop = oneOf('& |')
    relop = oneOf('< > <= >= != ==')
    bool_term = (ZeroOrMore('(') + (coor | fnumber) + relop +
                 (coor | fnumber) + ZeroOrMore(')'))
    relation = Forward()
    relation << (ZeroOrMore('(') + bool_term + ZeroOrMore(boolop + relation) +
                 ZeroOrMore(')'))
    relation = Group(relation).setParseAction(join_tokens)

    nos = Group(nodes + _of + surface).setParseAction(replace('E_NOS'))
    nir = Group(nodes + _in + relation).setParseAction(
        replace('E_NIR', keep=True))
    nbf = Group(nodes + _by + function).setParseAction(
        replace('E_NBF', keep=True))
    ebf = Group(elements + _by + function).setParseAction(
        replace('E_EBF', keep=True))
    eog = Group(elements + _of + group + Word(nums)).setParseAction(
        replace('E_EOG', keep=True))
    nog = Group(nodes + _of + group + Word(nums)).setParseAction(
        replace('E_NOG', keep=True))
    onir = Group(node + _in + region).setParseAction(
        replace_with_region('E_ONIR', 2))
    ni = Group(node + delimitedList(inumber)).setParseAction(
        replace('E_NI', keep=True))
    ei1 = Group(element + delimitedList(inumber)).setParseAction(
        replace('E_EI1', keep=True))
    etuple = (lpar.suppress() + inumber + comma.suppress() + inumber +
              rpar.suppress())
    ei2 = Group(element + delimitedList(etuple)).setParseAction(
        replace('E_EI2', keep=True))
    noset = Group(nodes + _of + _set + set_name).setParseAction(
        replace('E_NOSET', keep=True))
    eoset = Group(elements + _of + _set + set_name).setParseAction(
        replace('E_EOSET', keep=True))

    region_expression = Forward()

    atom1 = (_all | region | ni | onir | nos | nir | nbf
             | ei1 | ei2 | ebf | eog | nog | noset | eoset)
    atom1.setParseAction(to_stack(stack))
    atom2 = (lpar + region_expression.suppress() + rpar)
    atom = (atom1 | atom2)

    aux = (regop + region_expression)
    aux.setParseAction(to_stack(stack))
    region_expression << atom + ZeroOrMore(aux)
    region_expression = StringStart() + region_expression + StringEnd()

    return region_expression
Ejemplo n.º 13
0
def sqlparse(sql):
    # Define SQL tokens
    print("SQL Validating")
    selectStmt = Forward()
    SELECT = Keyword("select", caseless=True).addParseAction(upcaseTokens)
    FROM = Keyword("from", caseless=True).addParseAction(upcaseTokens)
    WHERE = Keyword("where", caseless=True).addParseAction(upcaseTokens)
    AS = Keyword("as", caseless=True).addParseAction(upcaseTokens)
    UNION = Keyword("union", caseless=True).addParseAction(upcaseTokens)
    INTERSECT = Keyword("intersect", caseless=True).addParseAction(upcaseTokens)
    EXCEPT = Keyword("except", caseless=True).addParseAction(upcaseTokens)
    COUNT = Keyword("count", caseless=True).addParseAction(upcaseTokens)
    MAX = Keyword("max", caseless=True).addParseAction(upcaseTokens)
    AVG = Keyword("avg", caseless=True).addParseAction(upcaseTokens)
    SUM = Keyword("sum", caseless=True).addParseAction(upcaseTokens)

    ident = Word(alphas, alphanums + "_$").setName("identifier")
    columnName = (delimitedList(ident, ".", combine=True)).setName("column name").addParseAction(upcaseTokens)
    columnNameList = Group(delimitedList(columnName))
    tableName = (delimitedList(ident, ".", combine=True)).setName("table name").addParseAction(upcaseTokens)
    tableNameAs = (delimitedList(ident + " " + AS + " " + ident, ",", combine=True)).setName("table name").addParseAction(
        upcaseTokens)
    tableNameList = delimitedList(tableName)
    funcs = ((COUNT | MAX | AVG | SUM) + "(" + ("*" | columnName) + ")")

    whereExpression = Forward()
    and_ = Keyword("and", caseless=True).addParseAction(upcaseTokens)
    or_ = Keyword("or", caseless=True).addParseAction(upcaseTokens)
    in_ = Keyword("in", caseless=True).addParseAction(upcaseTokens)
    GROUP_BY = Keyword("group by", caseless=True).addParseAction(upcaseTokens)
    HAVING = Keyword("having", caseless=True).addParseAction(upcaseTokens)
    CONTAINS = Keyword("contains", caseless=True).addParseAction(upcaseTokens)

    E = CaselessLiteral("E")
    binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
    arithSign = Word("+-", exact=1)
    realNum = Combine(Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) |
                                             ("." + Word(nums))) +
                      Optional(E + Optional(arithSign) + Word(nums)))
    intNum = Combine(Optional(arithSign) + Word(nums) +
                     Optional(E + Optional("+") + Word(nums)))

    columnRval = realNum | intNum | quotedString | columnName
    whereCondition = Group(
        (funcs + binop + columnRval) |
        (columnName + binop + columnRval) |
        (columnName + in_ + "(" + delimitedList(columnRval) + ")") |
        (columnName + in_ + "(" + selectStmt + ")") |
        ("(" + whereExpression + ")")
    )
    whereExpression << whereCondition + Optional(Group(GROUP_BY + columnName + Optional(
        HAVING + Group((funcs + binop + columnRval) | (columnName + binop + columnRval)) + ZeroOrMore(
            (and_ | or_) + Group((funcs + binop + columnRval) | (columnName + binop + columnRval)))))) + ZeroOrMore(
        (and_ | or_) + whereExpression)

    # Define the SQL grammar
    selectStmt <<= (SELECT + ('*' | Group(delimitedList(Group((funcs | columnName) + Optional(AS + ident)))))("columns") + \
                    FROM + Group(delimitedList(Group(tableName + Optional(AS + ident))))("tables") + \
                    Optional(Group(WHERE + whereExpression), "")("where")) + \
                   Optional((UNION + selectStmt)("union") | (INTERSECT + selectStmt)("intersect") | (EXCEPT + selectStmt)(
                       "except") | (CONTAINS + selectStmt)("contains"))

    SQLParser = selectStmt  # TODO - make paranthesies optional around a selectStmt (test h)

    # Begin validation
    try:
        print(sql, "\n-----\n", SQLParser.parseString(sql), "\n")
        parsedQuery = SQLParser.parseString(sql)
    except Exception as e:
        print("-------------------------------------------")
        print("SYNTAX ERROR PARSING: " + sql)
        print("-------------------------------------------")
        print("ERROR MESSAGE:")
        print("-------------------------------------------")
        print(e)

    # List of tables being used
    tables = parsedQuery[3]
    # attributes: list of attributes and their type (comes after select)
    attributes = parsedQuery[1]

    # Define the schema
    sailors = (
        ("tname", "sailors"),
        ("sid", "int"),
        ("sname", "str"),
        ("rating", "int"),
        ("age", "real")
    )
    boats = (
        ("tname", "boats"),
        ("bid", "int"),
        ("bname", "str"),
        ("color", "str")
    )
    reserves = (
        ("tname", "reserves"),
        ("sid", "int"),
        ("bid", "int"),
        ("day", "date")
    )

    print("Error messages (if any):")

    # Check if the table used in the query are valid based on the schema
    for item in tables:
        if (str(item[0]).upper() != sailors[0][1].upper()) and (str(item[0]).upper() != reserves[0][1].upper()) and (
            str(item[0]).upper() != boats[0][1].upper()):
            print(item[0] + " is not a table in the schema.")
            # Do something since a table is invalid

    # Check if the select attributes are valid according to the schema and what tables are being used in the query
    # - Iterate through each attributes
    # - Check if it's a built-in function, if it is then get the 2 index (that will be the attribute)
    # - If it's not a build in function, then get the 0 index (that will be the attribute)
    # - Check if that attribute is in any of the tables
    # - If it is, make sure that table is being used in the query (check if the table is in 'tables')
    attrTablePairs = []
    for attribute in attributes:
        # Extract the correct attribute
        if (str(attribute[0]).upper() == "COUNT") or (str(attribute[0]).upper() == "MAX") or (
            str(attribute[0]).upper() == "AVG") or (str(attribute[0]).upper() == "SUM"):
            attr = attribute[2]
        else:
            attr = attribute[0]
        if "." in attr:
            attr = attr.split(".")[1]
        # Check if the attribute is in any of the tables in the schema
        isInTable = False
        attrTableName = ""
        for item in sailors:
            if (item[0].upper() == attr or attr == "*"):
                isInTable = True
                attrTableName = "SAILORS"
                break
        for item in boats:
            if (item[0].upper() == attr or attr == "*"):
                isInTable = True
                attrTableName = "BOATS"
                break
        for item in reserves:
            if (item[0].upper() == attr or attr == "*"):
                isInTable = True
                attrTableName = "RESERVES"
                break
        if (isInTable == False):
            print(attr + " is not an attribute in the schema.")
            # Do something since an attribute is invalid
        else:
            print(attr + " is in the table " + attrTableName)
            # Build list of attr, table pairs
            attrTablePairs.append((attr, attrTableName))

    # Check to see if the corresponding table is being used in the query
    for pair in attrTablePairs:
        beingUsed = False
        if (pair[0] == "*"):
            beingUsed = True
        for table in tables:
            if (pair[1] == str(table[0].upper())):
                beingUsed = True
                break
        if (beingUsed == False):
            # Attribute is invalid as the table it belongs to is not being used in the query
            print(str(pair[0]) + " is invalid as the table it belongs to (" + str(
                pair[1]) + ") is not being used in the query.")

    # Check if the attributes being used in the WHERE stmnt are valid
    # - Check if WHERE stmnt exists
    if (len(parsedQuery) >= 5):
        whereExp = parsedQuery[4]
        for exp in whereExp:
            if (exp != "WHERE" and exp != "AND" and exp != "OR"):
                if (exp[0] == "GROUP BY"):
                    valid = False
                    for attr in attrTablePairs:
                        if (str(exp[1]).upper() == str(attr[0]).upper()):
                            valid = True
                            break
                    if (valid == False):
                        print(exp[1] + " in the group by clause is not a valid attribute")
                    if (len(exp) >= 3):
                        if (str(exp[2]).upper() == "HAVING"):
                            print("")
                else:
                    if (exp[0] == "COUNT" or exp[0] == "MAX" or exp[0] == "AVG" or exp[0] == "SUM"):
                        # Check if the attribute is valid
                        valid = False
                        for item in sailors:
                            if (item[0].upper() == str(exp[2]).upper()):
                                valid = True
                                break
                        for item in boats:
                            if (item[0].upper() == str(exp[2]).upper()):
                                valid = True
                                break
                        for item in reserves:
                            if (item[0].upper() == str(exp[2]).upper()):
                                valid = True
                                break
                        if (valid == False):
                            print(exp[2] + " in the where clause is not a valid attribute")
                    elif ("." in exp[0]):
                        # Check if the attribute is valid
                        valid = False
                        myAttr = exp[0].split(".")[1]
                        for item in sailors:
                            if (item[0].upper() == myAttr.upper()):
                                valid = True
                                break
                        for item in boats:
                            if (item[0].upper() == myAttr.upper()):
                                valid = True
                                break
                        for item in reserves:
                            if (item[0].upper() == myAttr.upper()):
                                valid = True
                                break
                        if (valid == False):
                            print(exp[0] + " in the where clause is not a valid attribute")
                    else:
                        # Check if the attribute is valid
                        valid = False

                        for item in sailors:
                            if (item[0].upper() == str(exp[0].upper())):
                                valid = True
                                break
                        for item in boats:
                            if (item[0].upper() == str(exp[0].upper())):
                                valid = True
                                break
                        for item in reserves:
                            if (item[0].upper() == str(exp[0].upper())):
                                valid = True
                                break
                        if (valid == False):
                            print(exp[0] + " in the where clause is not a valid attribute")

    # RELATIONAL ALGEBRA TRANSLATION
    Aggfunc = ['COUNT', 'MAX', 'MAX', 'AVG', 'SUM']
    Aggfunc2 = ['GROUP BY', "HAVING"]
    # SELECT conversion
    # Create Regular Expression string
    Rastr = '[(Projection)'

    # first element of section
    first = True
    rename = False
    # SELECT conversion
    try:
        for column in parsedQuery[1]:
            if first:
                if str(column[0]) in Aggfunc:
                    Rastr = Rastr + str(column[0]) + '(' + str(column[2]) + ')'
                else:
                    Rastr = Rastr + str(column[0])
                first = False
            else:
                if str(column[0]) in Aggfunc:
                    Rastr = Rastr + ',' + str(column[0]) + '(' + str(column[2]) + ')'
                else:
                    Rastr = Rastr + ',' + str(column[0])

            # Rename Set
            found=False
            if column.__len__() > 1:
                if 'AS' in str(column):
                    for item in (column):
                        if str(item)=="AS":
                            found=True
                        elif found:
                            if rename:
                                renastr = renastr + ',' + str(column[2])
                            else:
                                renastr = "(Rename)" + "[" + str(column[2]) + '<-' + str(column[0]) + ','
                                rename = True
                            found=False

        if rename:
            Rastr = renastr + "]" + Rastr
        Rastr+="]"
        # WHERE conversion
        wheref=False
        for attr in whereExp:
            #if agg function detected
            aggfunc1 = False
            aggfunc2 = False
            #checks if and/or found
            if str(attr) == "AND" or str(attr) == 'OR':
                Rastr = Rastr + str(attr) + " "
            # checks for where statement and converts to select
            elif str(attr) == "WHERE":
                Rastr = Rastr + '(Select)['
                whereF=True
            else:
                for item in attr:
                    if item in Aggfunc:
                        Rastr = Rastr + str(attr[0]) + '(' + str(attr[2]) + ')' + ' = ' + str(attr[5]) + ' '
                        aggfunc1 = True
                    elif str(item) in Aggfunc2:
                        Rastr = Rastr + str(item) + '('
                        aggfunc2 = True
                    else:
                        if aggfunc2:
                            if item[0] in Aggfunc:
                                Rastr = Rastr + str(item[0]) + '(' + str(item[2]) + ')' + '=' + str(item[5]) + ' '
                            elif str(item) == "AND" or str(item) == "OR":
                                Rastr = Rastr + str(item) + " "
                            else:
                                Rastr = Rastr + str(item) + ') '
                        elif not aggfunc1:
                            Rastr = Rastr + str(item) + ' '
                if aggfunc2:
                    Rastr = Rastr + ')'
        if wheref:
            Rastr = Rastr + ']'
        # FROM conversion of SQL
        Rastr = Rastr + '['
        first = True
        for table in tables:
            if first:
                if table.__len__() == 1:
                    Rastr = Rastr + str(table[0])
                else:
                    Rastr = Rastr + '(Rename)[' + str(table[2]) + ']' + str(table[0])
                first = False
            else:
                if table.__len__() == 1:
                    Rastr = Rastr + ' x ' + str(table[0])
                else:
                    Rastr = Rastr + 'x (Rename)[' + str(table[2]) + ']' + str(table[0])

        Rastr = Rastr + ']'
        print(Rastr)
    except Exception as e:
        Rast=""
        print("Error:",e)
    return Rastr
Ejemplo n.º 14
0
 def __init__(self, alert='!'):
     self.dm_expr_head = Optional(CaselessLiteral(alert))
     self.expr_head = CaselessLiteral('!')
     self.commands = []
     self.reinit_exprs()
Ejemplo n.º 15
0
quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE

# Numbers can just be numbers. Only integers though.
number = Regex('[0-9]+')

# Basis characters (by exclusion) for variable / field names.  The following
# list of characters is from the btparse documentation
any_name = Regex('[^\s"#%\'(),={}]+')

# btparse says, and the test bibs show by experiment, that macro and field names
# cannot start with a digit.  In fact entry type names cannot start with a digit
# either (see tests/bibs). Cite keys can start with a digit
not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*')

# Comment comments out to end of line
comment = (AT + CaselessLiteral('comment') +
           Regex("[\s{(].*").leaveWhitespace())

# The name types with their digiteyness
not_dig_lower = not_digname.copy().setParseAction(lambda t: t[0].lower())
macro_def = not_dig_lower.copy()
macro_ref = not_dig_lower.copy().setParseAction(lambda t: Macro(t[0].lower()))
field_name = not_dig_lower.copy()
# Spaces in names mean they cannot clash with field names
entry_type = not_dig_lower('entry_type')
cite_key = any_name('cite_key')
# Number has to be before macro name
string = (number | macro_ref | quoted_string | curly_string)

# There can be hash concatenation
field_value = string + ZeroOrMore(HASH + string)
Ejemplo n.º 16
0
ident = Word(alphas, alphanums + "_$").setName("identifier")
columnName = Upcase(delimitedList(ident, ".", combine=True))
columnNameList = Group(delimitedList(columnName))
columnNameList1 = Group(delimitedList(columnName))
groupNameList = Group(delimitedList(columnName))
tableName = Upcase(delimitedList(ident, ".", combine=True))
tableName2 = Upcase(delimitedList(ident, ".", combine=True))
tableAlias = tableName + astoken + tableName2
tableNameList = Group(delimitedList(tableAlias | tableName))

whereExpression = Forward()
and_ = Keyword("and", caseless=True)
or_ = Keyword("or", caseless=True)
in_ = Keyword("in", caseless=True)

E = CaselessLiteral("E")
binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
oper = oneOf("intersect union except contains", caseless=True)
oper1 = oneOf("count min max avg", caseless=True)
asoper = oneOf("as", caseless=True)

arithSign = Word("+-", exact=1)
realNum = Combine(
    Optional(arithSign) +
    (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) +
    Optional(E + Optional(arithSign) + Word(nums)))
intNum = Combine(
    Optional(arithSign) + Word(nums) +
    Optional(E + Optional("+") + Word(nums)))

columnRval = realNum | intNum | quotedString | columnName  # need to add support for alg expressions
Ejemplo n.º 17
0
    def _parse_study_search_string(self,
                                   searchstr,
                                   only_with_processed_data=False):
        """parses string into SQL query for study search

        Parameters
        ----------
        searchstr : str
            The string to parse
        only_with_processed_data : bool
            Whether or not to return studies with processed data.

        Returns
        -------
        study_sql : str
            SQL query for selecting studies with the required metadata columns
        sample_sql : str
            SQL query for each study to get the sample ids that mach the query
        meta_headers : list
            metadata categories in the query string in alphabetical order

        Notes
        -----
        All searches are case-sensitive

        References
        ----------
        .. [1] McGuire P (2007) Getting started with pyparsing.
        """
        # build the parse grammar
        category = Word(alphas + nums + "_")
        seperator = oneOf("> < = >= <= !=") | CaselessLiteral("includes") | \
            CaselessLiteral("startswith")
        value = Word(alphas + nums + "_" + ":" + ".") | \
            dblQuotedString().setParseAction(removeQuotes)
        criterion = Group(category + seperator + value)
        criterion.setParseAction(SearchTerm)
        and_ = CaselessLiteral("and")
        or_ = CaselessLiteral("or")
        not_ = CaselessLiteral("not")
        optional_seps = Optional(and_ | or_ | not_)

        # create the grammar for parsing operators AND, OR, NOT
        search_expr = operatorPrecedence(criterion,
                                         [(not_, 1, opAssoc.RIGHT, SearchNot),
                                          (and_, 2, opAssoc.LEFT, SearchAnd),
                                          (or_, 2, opAssoc.LEFT, SearchOr)])

        # parse the search string to get out the SQL WHERE formatted query
        eval_stack = (search_expr + stringEnd).parseString(searchstr)[0]
        sql_where = eval_stack.generate_sql()

        # parse out all metadata headers we need to have in a study, and
        # their corresponding types
        all_headers = [
            c[0][0].term[0]
            for c in (criterion + optional_seps).scanString(searchstr)
        ]
        meta_headers = set(all_headers)
        all_types = [
            c[0][0].term[2]
            for c in (criterion + optional_seps).scanString(searchstr)
        ]

        # sort headers and types so they return in same order every time.
        # Should be a relatively short list so very quick
        # argsort implementation taken from
        # http://stackoverflow.com/questions/3382352/
        # equivalent-of-numpy-argsort-in-basic-python
        sort_order = sorted(range(len(all_headers)),
                            key=all_headers.__getitem__)
        all_types = [all_types[x] for x in sort_order]
        all_headers.sort()

        # At this point it is possible that a metadata header has been
        # reference more than once in the query. If the types agree, then we
        # do not need to do anything. If the types do not agree (specifically,
        # if it appears to be numerical in one case and string in another),
        # then we need to give varchar the precedence.
        meta_header_type_lookup = dict()
        for header, header_type in zip(all_headers, all_types):
            if header not in meta_header_type_lookup:
                meta_header_type_lookup[header] = header_type
            else:
                if header_type == 'varchar' or \
                        meta_header_type_lookup[header] == 'varchar':
                    meta_header_type_lookup[header] = 'varchar'

        # create the study finding SQL
        # remove metadata headers that are in study table
        meta_headers.discard('sample_id')
        meta_headers = tuple(meta_headers.difference(self.study_cols))

        # get all study ids that contain all metadata categories searched for
        sql = []
        if meta_headers:
            # have study-specific metadata, so need to find specific studies
            for meta in meta_headers:
                sql.append("SELECT DISTINCT table_name FROM "
                           "information_schema.columns WHERE "
                           "lower(column_name) = lower('{0}')".format(
                               qdb.util.scrub_data(meta)))
        else:
            # no study-specific metadata, so need all studies
            sql.append("SELECT DISTINCT table_name "
                       "FROM information_schema.columns")

        # combine the query
        if only_with_processed_data:
            sql.append("SELECT DISTINCT 'sample_' || CAST(study_id AS VARCHAR)"
                       "FROM qiita.study_artifact "
                       "JOIN qiita.artifact USING (artifact_id) "
                       "JOIN qiita.artifact_type USING (artifact_type_id) "
                       "WHERE artifact_type = 'BIOM'")

        # restrict to studies in portal
        sql.append("SELECT 'sample_' || CAST(study_id AS VARCHAR) "
                   "FROM qiita.study_portal "
                   "JOIN qiita.portal_type USING (portal_type_id) "
                   "WHERE portal = '%s'" % qiita_config.portal)
        study_sql = ' INTERSECT '.join(sql)

        # create  the sample finding SQL, getting both sample id and values
        # build the sql formatted list of metadata headers
        header_info = []
        for meta in meta_header_type_lookup:
            if meta in self.study_cols:
                header_info.append("st.%s" % meta)
            else:
                header_info.append("sa.%s" % meta)
        # build the SQL query

        sample_sql = ("SELECT ss.sample_id, %s "
                      "FROM qiita.study_sample ss "
                      "JOIN qiita.sample_{0} sa ON ss.sample_id = sa.sample_id"
                      " JOIN qiita.study st ON st.study_id = ss.study_id "
                      "WHERE %s" % (','.join(header_info), sql_where))

        return study_sql, sample_sql, meta_header_type_lookup.keys()
Ejemplo n.º 18
0
    print('toks:', toks)
    if len(toks[0]):
        if (toks[0].lower() == 'true') or (toks[0].lower() == 'yes'):
            return True
        elif (toks[0].lower() == 'false') or (toks[0].lower() == 'no'):
            return False
        if toks[0].isnumeric():
            if int(toks[0]) == 1:
                return True
            elif int(toks[0]) == 0:
                return False
    else:
        return False


isc_boolean = (CaselessLiteral('true')
               | CaselessLiteral('false')
               | CaselessLiteral('yes')
               | CaselessLiteral('no')
               | Literal('1')
               | Literal('0'))

find_pattern = Group(
    Word(alphanums + '_-/:.')('isc_boolean').setParseAction(convertBoolean))(
        'find_pattern')

parse_me(find_pattern, 'TRUE')
parse_me(find_pattern, 'True')
parse_me(find_pattern, 'true')
parse_me(find_pattern, 'yes')
parse_me(find_pattern, 'Yes')
Ejemplo n.º 19
0
# Most of these markers could be SuffixMarkers (which arise due to errors in
# the regulation text). We'll wait until we see explicit examples before
# converting them though, to limit false matches
paragraph_marker = Marker("paragraph")
paragraphs_marker = SuffixMarker("paragraphs")

part_marker = Marker("part")
parts_marker = Marker("parts")

subpart_marker = Marker("subpart")

comment_marker = ((Marker("comment")
                   | Marker("commentary")
                   | (Marker("official") + Marker("interpretations"))
                   | (Marker("supplement") + Suppress(WordBoundaries("I")))) +
                  Optional(Marker("of") | Marker("to")))
comments_marker = Marker("comments")

appendix_marker = Marker("appendix")
appendices_marker = Marker("appendices")

conj_phrases = ((Suppress(",") + Optional(Marker("and") | Marker("or")))
                | Marker("and")
                | Marker("or")
                | (Marker("except") + Marker("for"))
                | Suppress("-")
                | WordBoundaries(
                    CaselessLiteral("through")).setResultsName("through"))

title = Word(string.digits).setResultsName("cfr_title")
Ejemplo n.º 20
0
    def get_grammar(self):
        """
        Defines our grammar for mathematical expressions.

        Possibly helpful:
            - BNF form of context-free grammar https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form
            - Some pyparsing docs http://infohost.nmt.edu/~shipman/soft/pyparsing/web/index.html
        """

        # Define + and -
        plus = Literal("+")
        minus = Literal("-")
        plus_minus = plus | minus

        # 1 or 1.0 or .1
        number_part = Word(nums)
        inner_number = Combine((number_part + Optional("." + Optional(number_part)))
                               |
                               ("." + number_part))
        # Combine() joints the matching parts together in a single token,
        # and requires that the matching parts be contiguous (no spaces)

        # Define our suffixes
        suffix = Word(alphas + '%')
        suffix.setParseAction(self.suffix_parse_action)

        # Construct number as a group consisting of a text string ("num") and an optional suffix.
        # num can include a decimal number and numerical exponent, and can be
        # converted to a number using float()
        # suffix may contain alphas or %
        # Spaces are ignored inside numbers
        # Group wraps everything up into its own ParseResults object when parsing
        number = Group(
            Combine(
                inner_number +
                Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part),
            )("num")
            + Optional(suffix)("suffix")
        )("number")
        # Note that calling ("name") on the end of a parser is equivalent to calling
        # parser.setResultsName, which is used to pull that result out of a parsed
        # expression like a dictionary.

        # Construct variable and function names
        front = Word(alphas, alphanums)  # must start with alpha
        subscripts = Word(alphanums + '_') + ~FollowedBy('{')  # ~ = not
        lower_indices = Literal("_{") + Optional("-") + Word(alphanums) + Literal("}")
        upper_indices = Literal("^{") + Optional("-") + Word(alphanums) + Literal("}")
        # Construct an object name in either of two forms:
        #   1. front + subscripts + tail
        #   2. front + lower_indices + upper_indices + tail
        # where:
        #   front (required):
        #       starts with alpha, followed by alphanumeric
        #   subscripts (optional):
        #       any combination of alphanumeric and underscores
        #   lower_indices (optional):
        #       Of form "_{(-)<alphanumeric>}"
        #   upper_indices (optional):
        #       Of form "^{(-)<alphanumeric>}"
        #   tail (optional):
        #       any number of primes
        name = Combine(front +
                       Optional(subscripts |
                                (Optional(lower_indices) + Optional(upper_indices))
                               ) +
                       ZeroOrMore("'"))
        # Define a variable as a pyparsing result that contains one object name
        variable = Group(name("varname"))("variable")
        variable.setParseAction(self.variable_parse_action)

        # initialize recursive grammar
        expression = Forward()

        # Construct functions as consisting of funcname and arguments as
        # funcname(arguments)
        # where arguments is a comma-separated list of arguments, returned as a list
        # Must have at least 1 argument
        function = Group(name("funcname") +
                         Suppress("(") +
                         Group(delimitedList(expression))("arguments") +
                         Suppress(")")
                        )("function")
        function.setParseAction(self.function_parse_action)

        # Define parentheses
        parentheses = Group(Suppress("(") +
                            expression +
                            Suppress(")"))('parentheses')

        # Define arrays
        array = Group(Suppress("[") +
                      delimitedList(expression) +
                      Suppress("]"))("array")

        # atomic units evaluate directly to number or array without binary operations
        atom = number | function | variable | parentheses | array

        # Define operations in order of precedence
        # Define exponentiation, possibly including negative powers
        power = atom + ZeroOrMore(Suppress("^") + Optional(minus)("op") + atom)
        power.addParseAction(self.group_if_multiple('power'))

        # Define negation (e.g., in 5*-3 --> we need to evaluate the -3 first)
        # Negation in powers is handled separately
        # This has been arbitrarily assigned a higher precedence than parallel
        negation = Optional(minus)("op") + power
        negation.addParseAction(self.group_if_multiple('negation'))

        # Define the parallel operator 1 || 5 == 1/(1/1 + 1/5)
        pipes = Literal('|') + Literal('|')
        parallel = negation + ZeroOrMore(Suppress(pipes) + negation)
        parallel.addParseAction(self.group_if_multiple('parallel'))

        # Define multiplication and division
        product = parallel + ZeroOrMore((Literal('*') | Literal('/'))("op") + parallel)
        product.addParseAction(self.group_if_multiple('product'))

        # Define sums and differences
        # Note that leading - signs are treated by negation
        sumdiff = Optional(plus) + product + ZeroOrMore(plus_minus("op") + product)
        sumdiff.addParseAction(self.group_if_multiple('sum'))

        # Close the recursion
        expression << sumdiff

        return expression + stringEnd
Ejemplo n.º 21
0
def get_grammar_parser():
    # Define keywords and literals.
    import_ = Suppress(CaselessKeyword("import"))
    grammar_ = Suppress("grammar")

    def _make_grammar(tokens):
        # Create a new Grammar object.
        result = Grammar()

        # Get the attributes in the header as well as the name.
        version, charset, language, name = tokens[0:4]

        # Use charset as the language instead if it is 2 characters long and no
        # language was specified.
        if not language and len(charset) == 2:
            language = charset
            charset = ""

        # Set the header attributes and grammar name.
        result.jsgf_version = version[1:]
        result.charset_name = charset
        result.language_name = language
        result.name = name

        # Add the remaining imports/rules to the grammar.
        for token in tokens[4:]:
            if isinstance(token, Import):
                result.add_import(token)
            else:
                result.add_rule(token)

        # Return the new grammar object.
        return result

    # Define parser elements for the grammar header.
    version_no = Regex(r"(v|V)(\d+\.\d+|\d+\.|\.\d+)") \
        .setName("version number")

    def optional_header_action(tokens):
        return tokens if tokens else [""]

    charset_name = Optional(word.copy()).setName("character set") \
        .setParseAction(optional_header_action)
    language_name = Optional(word.copy()).setName("language name") \
        .setParseAction(optional_header_action)

    header_line = (Suppress(CaselessLiteral("#JSGF")) + version_no +
                   charset_name + language_name +
                   line_delimiter).setName("grammar header")

    # Define the grammar name line, import statements and rule lines. All lines
    # should support C++ style comments (/* comment */ or // comment).
    name_line = (grammar_ + grammar_name + line_delimiter) \
        .setName("grammar declaration").ignore(cppStyleComment)
    import_statement = (import_ + langle + import_name + rangle + line_delimiter) \
        .setParseAction(lambda tokens: Import(tokens[0])).ignore(cppStyleComment)

    # Define the grammar parser element, then set its name and parse action.
    parser = (header_line + name_line + ZeroOrMore(import_statement) +
              OneOrMore(rule_parser))
    parser.setName("grammar").setParseAction(_make_grammar)
    return parser
Ejemplo n.º 22
0
Title: Elements That Provides DNS Resource Records

Description: Provides RR-related grammar in PyParsing engine
             for ISC-configuration style

             For resource records found in BIND9 DNS zone records
"""

from pyparsing import Optional, Combine, CaselessLiteral, \
    Literal, Char, OneOrMore, Group, ungroup
from bind9_parser.isc_utils import semicolon
from bind9_parser.isc_domain import domain_generic_fqdn, domain_charset_wildcard

g_test_over_63_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abc"

rr_class_in = CaselessLiteral('IN')
rr_class_ch = CaselessLiteral('CH')
rr_class_hesiod = CaselessLiteral('HS')
rr_class_none = CaselessLiteral('NONE')  # RFC 2136
rr_class_any = CaselessLiteral('ANY')  # RFC 1035

rr_class_set = (rr_class_in
                | rr_class_hesiod
                | rr_class_ch
                | rr_class_none
                | rr_class_any)('rr_class')
rr_class_set.setName('<rr_class>')

rr_type_a = CaselessLiteral('A')
rr_type_ns = CaselessLiteral('NS')
rr_type_cname = CaselessLiteral('CNAME')
Ejemplo n.º 23
0
            else:  # Append to query the token
                if negation:
                    t = ~t
                if operation == 'or':
                    query |= t
                else:
                    query &= t
    return query


NO_BRTS = printables.replace('(', '').replace(')', '')
SINGLE = Word(NO_BRTS.replace('*', ''))
WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS)
QUOTED = quotedString.setParseAction(removeQuotes)

OPER_AND = CaselessLiteral('and')
OPER_OR = CaselessLiteral('or')
OPER_NOT = '-'

TERM = Combine(
    Optional(Word(alphas).setResultsName('meta') + ':') +
    (QUOTED.setResultsName('query') | WILDCARDS.setResultsName('query')))
TERM.setParseAction(createQ)

EXPRESSION = operatorPrecedence(
    TERM, [(OPER_NOT, 1, opAssoc.RIGHT), (OPER_OR, 2, opAssoc.LEFT),
           (Optional(OPER_AND, default='and'), 2, opAssoc.LEFT)])
EXPRESSION.setParseAction(unionQ)

QUERY = OneOrMore(EXPRESSION) + StringEnd()
QUERY.setParseAction(unionQ)
Ejemplo n.º 24
0
    nums,
    Combine,
    oneOf,
    opAssoc,
    operatorPrecedence,
    Suppress,
    alphanums,
    alphas,
    Word,
    Forward,
    Regex,
)

dot = Literal(".")
underscore = "_"
eg = CaselessLiteral("eg")
si = CaselessLiteral("di")
ai = CaselessLiteral("ai")

lparen = Literal("(")
rparen = Literal(")")

lt = Literal("<")
le = Literal("<=")
gt = Literal(">")
ge = Literal(">=")
eq = Literal("=")

alphanums_extended = alphanums + "-_"

# deficion de numero estilo JSON
Ejemplo n.º 25
0
def create_parser():
    """
    Build and return a parser for expressions.
    
    Parser builds a tuple-based representation of the formula the should be easy to evaluate.
    Each component is a tuple with:
       the first element indicats the type,
       the second element gives a set of columns referenced
       the rest gives the required arguments for the formula-chunk.
    
    Parser throws ParseException if something goes wrong.
    """
    from pyparsing import Literal, Word, Optional, CaselessLiteral, Group, StringStart, StringEnd, Suppress, CharsNotIn, Forward, nums, delimitedList, operatorPrecedence, opAssoc

    def column_parse(toks):
        """
        Parse a column name and strip off any ".foo" modifier.
        """
        col = toks[0][0]
        if col.endswith(".max"):
            col = col[:-4]
            return ("col", set([col]), col, 'max')
        elif col.endswith(".percent"):
            col = col[:-8]
            return ("col", set([col]), col, 'per')
        elif col.endswith(".final"):
            col = col[:-6]
            return ("col", set([col]), col, 'fin')
        else:
            return ("col", set([col]), col, 'val')

    def actionflag_parse(toks):
        """
        Parse the [[activitytotal]] special case
        """
        flag = toks[0][0]
        if flag == 'activitytotal':
            # dependant activity True is a flag meaning "everything": fixed later.
            return ("flag", set([True]), flag)
    
        raise ParseException, "Unknown flag ([[...]])."

    def real_parse(toks):
        return ("num", set(), float(''.join(toks)))

    def func_parse(toks):
        cols = set()
        cols.update(*(t[1] for t in toks[0][1:]))
        return ("func", cols) + tuple(toks[0])

    def expr_parse(s, loc, toks):
        ts = toks[0]
        if len(ts) == 2:
            # unary operator
            return ("sign", ts[1][1], ts[0]) + tuple(ts[1:])
        elif len(ts) > 1 and len(ts)%2==1:
            # one or more ops at the same level
            # build list of referenced columns:
            cols = set()
            cols.update(*(t[1] for t in ts[0::2]))
            return ("expr", cols, ts[0]) + tuple(ts[1:])
        else:
            raise ParseException, "Unknown expression parsed."

    sign = Literal("+") | Literal("-")
    real = (Word( nums ) + "." + Optional( Word(nums) ) +  # whole/decimal part
            Optional( CaselessLiteral("E") + Optional(sign) + Word(nums) ) # scientific notation part
            )
    integer = Word(nums)
    number = (real | integer).setParseAction(real_parse) # all numbers treated as floats to avoid integer arithmetic rounding

    # Allow anything except ']' in column names.  Let the limitations on sane column names be enforced somewhere else.
    actionflag = Group(Suppress('[[') + CharsNotIn('[]') + Suppress(']]') ).setParseAction(actionflag_parse)
    column = Group(Suppress('[') + CharsNotIn('[]') + Suppress(']') ).setParseAction(column_parse)
    expr = Forward()
    function_name = ( CaselessLiteral("SUM") | CaselessLiteral("AVG") | CaselessLiteral("MAX")
            | CaselessLiteral("MIN") | CaselessLiteral("BEST") )
    function = Group(function_name + Suppress('(') + delimitedList(expr) + Suppress(')')).setParseAction(func_parse)
    operand = number | column | function | actionflag

    signop = Literal("+") | Literal("-")
    multop = Literal("*") | Literal("/")
    plusop = Literal("+") | Literal("-")

    expr << operatorPrecedence( operand,
	[(signop, 1, opAssoc.RIGHT, expr_parse),
	 (multop, 2, opAssoc.LEFT, expr_parse),
	 (plusop, 2, opAssoc.LEFT, expr_parse),]
	)

    formula = StringStart() + expr + StringEnd()
    return formula
Ejemplo n.º 26
0
    def BNF(self):
        """
        expop   :: '^'
        multop  :: '*' | '/'
        addop   :: '+' | '-'
        integer :: ['+' | '-'] '0'..'9'+
        atom    :: PI | E | real | fn '(' expr ')' | '(' expr ')'
        factor  :: atom [ expop factor ]*
        term    :: factor [ multop factor ]*
        expr    :: term [ addop term ]*
        """
        #global bnf
        if not self.bnf:
            point = Literal(".")
            fnumber = Combine(
                Word("+-" + nums, nums) +
                Optional(point + Optional(Word(nums))))
            ident = Word(alphas, alphas + nums + "_$")
            sensor_ident = (CaselessLiteral('GET_SENSOR_VAL:').suppress() +
                            Word(alphas + nums + "_$")).setParseAction(
                                self.get_sensor_value)

            and_ = CaselessLiteral('AND')
            or_ = CaselessLiteral('OR')
            not_ = CaselessLiteral('NOT')
            neq = CaselessLiteral('!=')
            lt = Literal('<')
            eqlt = CaselessLiteral('<=')
            gt = Literal('>')
            eqgt = CaselessLiteral('>=')
            eq = CaselessLiteral('==')

            plus = Literal("+")
            minus = Literal("-")
            mult = Literal("*")
            div = Literal("/")
            lpar = Literal("(").suppress()
            rpar = Literal(")").suppress()
            add_op = plus | minus
            mult_op = mult | div
            cmp_op = eqlt | eqgt | eq | neq | lt | gt

            expr = Forward()
            atom = (Optional("-") +
                    (fnumber | sensor_ident |
                     ident + lpar + expr + rpar).setParseAction(self.pushFirst)
                    | (lpar + expr.suppress() + rpar)).setParseAction(
                        self.pushUMinus)

            term = atom + ZeroOrMore(
                (mult_op + atom).setParseAction(self.pushFirst))
            add_exp = term + ZeroOrMore(
                (add_op + term).setParseAction(self.pushFirst))
            cmp_exp = add_exp + ZeroOrMore(
                (cmp_op + add_exp).setParseAction(self.pushFirst))
            cmp_not_exp = cmp_exp + ZeroOrMore(
                (not_ + cmp_exp).setParseAction(self.pushFirst))
            cmp_not_and_exp = cmp_not_exp + ZeroOrMore(
                (and_ + cmp_not_exp).setParseAction(self.pushFirst))
            cmp_not_and_or_exp = cmp_not_and_exp + ZeroOrMore(
                (or_ + cmp_not_and_exp).setParseAction(self.pushFirst))
            expr << cmp_not_and_or_exp
            self.bnf = expr
        return self.bnf
Ejemplo n.º 27
0
def pyparse_gml():
    """A pyparsing tokenizer for GML graph format.

    This is not intended to be called directly.

    See Also
    --------
    write_gml, read_gml, parse_gml

    Notes
    -----
    This doesn't implement the complete GML specification for
    nested attributes for graphs, edges, and nodes. 

    """
    global graph

    try:
        from pyparsing import \
             Literal, CaselessLiteral, Word, Forward,\
             ZeroOrMore, Group, Dict, Optional, Combine,\
             ParseException, restOfLine, White, alphas, alphanums, nums,\
             OneOrMore,quotedString,removeQuotes,dblQuotedString
    except ImportError:
        raise ImportError, \
          "Import Error: not able to import pyparsing: http://pyparsing.wikispaces.com/"

    if not graph:
        lbrack = Literal("[").suppress()
        rbrack = Literal("]").suppress()
        pound = ("#")
        comment = pound + Optional(restOfLine)
        white = White(" \t\n")
        point = Literal(".")
        e = CaselessLiteral("E")
        integer = Word(nums).setParseAction(lambda s, l, t: [int(t[0])])
        real = Combine(
            Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) +
            Optional(e + Word("+-" + nums, nums))).setParseAction(
                lambda s, l, t: [float(t[0])])
        key = Word(alphas, alphanums + '_')
        value_atom = integer ^ real ^ Word(
            alphanums) ^ quotedString.setParseAction(removeQuotes)

        value = Forward()  # to be defined later with << operator
        keyvalue = Group(key + value)
        value << (value_atom | Group(lbrack + ZeroOrMore(keyvalue) + rbrack))
        node = Group(
            Literal("node") + lbrack + Group(OneOrMore(keyvalue)) + rbrack)
        edge = Group(
            Literal("edge") + lbrack + Group(OneOrMore(keyvalue)) + rbrack)

        creator = Group(Literal("Creator") + Optional(restOfLine))
        version = Group(Literal("Version") + Optional(restOfLine))
        graphkey = Literal("graph").suppress()

        graph = Optional(creator)+Optional(version)+\
            graphkey + lbrack + ZeroOrMore( (node|edge|keyvalue) ) + rbrack
        graph.ignore(comment)

    return graph
Ejemplo n.º 28
0
    def __init__(self):
        # define grammar
        point = Literal('.')
        e = CaselessLiteral('E')
        plusorminus = Literal('+') | Literal('-')
        number = Word(nums)
        integer = Combine(Optional(plusorminus) + number)
        floatnumber = Combine(integer + Optional(point + Optional(number)) +
                              Optional(e + integer))

        ident = Word('$', alphanums + '_')

        plus = Literal("+")
        minus = Literal("-")
        mult = Literal("*")
        div = Literal("/")
        lpar = Literal("(").suppress()
        rpar = Literal(")").suppress()
        addop = plus | minus
        multop = mult | div
        expop = Literal("^")

        expr = Forward()

        def defineFunction(name, parameterCount=None):
            keyword = CaselessKeyword(name).setParseAction(self.pushEnd)
            funcPattern = keyword + lpar
            if parameterCount == None:
                funcPattern += Optional(expr + ZeroOrMore(Literal(',') + expr))
            elif parameterCount > 0:
                funcPattern += expr
                for i in range(parameterCount - 1):
                    funcPattern += Literal(',') + expr
            funcPattern += rpar
            return funcPattern.setParseAction(self.pushFirst)

        maxFunc = defineFunction('max')
        minFunc = defineFunction('min')
        casesFunc = defineFunction('cases')
        cases1Func = defineFunction('cases1', parameterCount=5)
        cases2Func = defineFunction('cases2', parameterCount=8)
        cases3Func = defineFunction('cases3', parameterCount=11)
        cases333Func = defineFunction('cases333', parameterCount=11)
        round3downFunc = defineFunction('round3down', parameterCount=1)

        #func = (funcident.setParseAction(self.pushEnd)+lpar +Optional(expr+ZeroOrMore(Literal(',')+expr))+rpar).setParseAction(self.pushFirst)
        atom = (maxFunc | minFunc | casesFunc | cases1Func | cases2Func
                | cases3Func | cases333Func | round3downFunc |
                (e | floatnumber | integer | ident).setParseAction(
                    self.pushFirst) | (lpar + expr.suppress() + rpar))

        factor = Forward()
        factor << atom + ZeroOrMore(
            (expop + factor).setParseAction(self.pushFirst))

        term = factor + ZeroOrMore(
            (multop + factor).setParseAction(self.pushFirst))
        expr << term + ZeroOrMore(
            (addop + term).setParseAction(self.pushFirst))

        self.pattern = expr + StringEnd()
        # map operator symbols to corresponding arithmetic operations
        self.opn = {
            "+": self.handleNone(lambda a, b: a + b),
            "-": self.handleNone(lambda a, b: a - b),
            "*": self.handleNone(lambda a, b: a * b, none_survives=True),
            "/": self.handleNone(lambda a, b: a / b, none_survives=True),
            "^": self.handleNone(lambda a, b: a**b, none_survives=True)
        }
        self.functions = {
            'max': self.max,
            'min': self.min,
            'cases': self.cases,
            'cases1': self.cases1,
            'cases2': self.cases2,
            'cases3': self.cases3,
            'cases333': self.cases333,
            'round3down': self.round3down
        }
Ejemplo n.º 29
0
 def __init__(self):
     """
     Please use any of the following symbols:
     expop   :: '^'
     multop  :: '*' | '/'
     addop   :: '+' | '-'
     integer :: ['+' | '-'] '0'..'9'+
     """
     point = Literal(".")
     e = CaselessLiteral("E")
     fnumber = Combine(
         Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) +
         Optional(e + Word("+-" + nums, nums)))
     ident = Word(alphas, alphas + nums + "_$")
     plus = Literal("+")
     minus = Literal("-")
     mult = Literal("*")
     div = Literal("/")
     lpar = Literal("(").suppress()
     rpar = Literal(")").suppress()
     addop = plus | minus
     multop = mult | div
     expop = Literal("^")
     pi = CaselessLiteral("PI")
     expr = Forward()
     atom = ((Optional(oneOf("- +")) +
              (pi | e | fnumber
               | ident + lpar + expr + rpar).setParseAction(self.pushFirst))
             | Optional(oneOf("- +")) +
             Group(lpar + expr + rpar)).setParseAction(self.pushUMinus)
     # by defining exponentiation as "atom [ ^ factor ]..." instead of
     # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
     # that is, 2^3^2 = 2^(3^2), not (2^3)^2.
     factor = Forward()
     factor << atom + ZeroOrMore(
         (expop + factor).setParseAction(self.pushFirst))
     term = factor + ZeroOrMore(
         (multop + factor).setParseAction(self.pushFirst))
     expr << term + ZeroOrMore(
         (addop + term).setParseAction(self.pushFirst))
     # addop_term = ( addop + term ).setParseAction( self.pushFirst )
     # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term)
     # expr <<  general_term
     self.bnf = expr
     # this will map operator symbols to their corresponding arithmetic operations
     epsilon = 1e-12
     self.opn = {
         "+": operator.add,
         "-": operator.sub,
         "*": operator.mul,
         "/": operator.truediv,
         "^": operator.pow
     }
     self.fn = {
         "sin": math.sin,
         "cos": math.cos,
         "tan": math.tan,
         "abs": abs,
         "trunc": lambda a: int(a),
         "round": round,
         "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0
     }
Ejemplo n.º 30
0
cvt_int = lambda toks: int(toks[0])
integer.setParseAction(cvt_int)

boolean_true = Keyword('True', caseless=True)
boolean_true.setParseAction(lambda x: True)
boolean_false = Keyword('False', caseless=True)
boolean_false.setParseAction(lambda x: False)

boolean = boolean_true | boolean_false

none = Keyword('None', caseless=True)

cvt_none = lambda toks: [None]
none.setParseAction(cvt_none)

e = CaselessLiteral("e")
real = (Combine(Optional(oneOf('+ -')) + Word(nums)
               + '.' + Optional(Word(nums))
               + Optional(e + Optional(oneOf('+ -')) + Word(nums)))
        | Combine(Optional(oneOf('+ -')) + Word(nums)
               + Optional('.') + Optional(Word(nums))
               + e + Optional(oneOf('+ -')) + Word(nums))).setName('real')
cvt_real = lambda toks: float(toks[0])
real.setParseAction(cvt_real)

cmplx = real + CaselessLiteral('j')
cvt_cmplx = lambda toks: complex(toks[0])
cmplx.setParseAction(cvt_cmplx)

array_index = integer + Optional(colon + integer
                                 + Optional(colon + integer))