return OneOrMore(token + maybeComma) digit_sequence = Word(nums) sign = oneOf("+ -") def convertToFloat(s, loc, toks): try: return float(toks[0]) except: raise ParseException(loc, "invalid float format %s" % toks[0]) exponent = CaselessLiteral("e") + Optional(sign) + Word(nums) #note that almost all these fields are optional, #and this can match almost anything. We rely on Pythons built-in #float() function to clear out invalid values - loosely matching like this #speeds up parsing quite a lot floatingPointConstant = Combine( Optional(sign) + Optional(Word(nums)) + Optional(Literal(".") + Optional(Word(nums))) + Optional(exponent)) floatingPointConstant.setParseAction(convertToFloat) number = floatingPointConstant #same as FP constant but don't allow a - sign nonnegativeNumber = Combine(
cppStyleComment, OneOrMore, quotedString, restOfLine, delimitedList, \ dictOf, Forward, Dict lbrace = Suppress("{") rbrace = Suppress("}") lbracket = Suppress("[") rbracket = Suppress("]") lparen = Suppress("(") rparen = Suppress(")") equal = Suppress("=") comma = Suppress(",") point = Literal('.') tilde = ('~') bang = ('!') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') hashsymbol = Suppress("#") dbquotes = '"' uni_arrow = Literal("->") bi_arrow = Literal("<->") # system keywords system_constants_ = Keyword("SYSTEM_CONSTANTS") # molecule keywords define_molecules_ = Keyword("DEFINE_MOLECULES") define_functions_ = Keyword("DEFINE_FUNCTIONS") diffusion_constant_3d_ = Keyword("DIFFUSION_CONSTANT_3D") diffusion_constant_2d_ = Keyword("DIFFUSION_CONSTANT_2D")
def _build_asn1_grammar(): def build_identifier(prefix_pattern): identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]'))) # todo: more rigorous? trailing hyphens and -- forbidden return Combine( Word(srange(prefix_pattern), exact=1) + identifier_suffix) def braced_list(element_rule): elements_rule = Optional(delimitedList(element_rule)) return Suppress('{') + Group(elements_rule) + Suppress('}') def annotate(name): def annotation(t): return AnnotatedToken(name, t.asList()) return annotation # Reserved words ANY = Keyword('ANY') DEFINED_BY = Keyword('DEFINED BY') DEFINITIONS = Keyword('DEFINITIONS') BEGIN = Keyword('BEGIN') END = Keyword('END') OPTIONAL = Keyword('OPTIONAL') DEFAULT = Keyword('DEFAULT') TRUE = Keyword('TRUE') FALSE = Keyword('FALSE') UNIVERSAL = Keyword('UNIVERSAL') APPLICATION = Keyword('APPLICATION') PRIVATE = Keyword('PRIVATE') MIN = Keyword('MIN') MAX = Keyword('MAX') IMPLICIT = Keyword('IMPLICIT') EXPLICIT = Keyword('EXPLICIT') EXPLICIT_TAGS = Keyword('EXPLICIT TAGS') IMPLICIT_TAGS = Keyword('IMPLICIT TAGS') AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS') EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED') COMPONENTS_OF = Keyword('COMPONENTS OF') ELLIPSIS = Keyword('...') SIZE = Keyword('SIZE') OF = Keyword('OF') IMPORTS = Keyword('IMPORTS') EXPORTS = Keyword('EXPORTS') FROM = Keyword('FROM') # Built-in types SEQUENCE = Keyword('SEQUENCE') SET = Keyword('SET') CHOICE = Keyword('CHOICE') ENUMERATED = Keyword('ENUMERATED') BIT_STRING = Keyword('BIT STRING') BOOLEAN = Keyword('BOOLEAN') REAL = Keyword('REAL') OCTET_STRING = Keyword('OCTET STRING') CHARACTER_STRING = Keyword('CHARACTER STRING') NULL = Keyword('NULL') INTEGER = Keyword('INTEGER') OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER') # Restricted string types BMPString = Keyword('BMPString') GeneralString = Keyword('GeneralString') GraphicString = Keyword('GraphicString') IA5String = Keyword('IA5String') ISO646String = Keyword('ISO646String') NumericString = Keyword('NumericString') PrintableString = Keyword('PrintableString') TeletexString = Keyword('TeletexString') T61String = Keyword('T61String') UniversalString = Keyword('UniversalString') UTF8String = Keyword('UTF8String') VideotexString = Keyword('VideotexString') VisibleString = Keyword('VisibleString') # Useful types GeneralizedTime = Keyword('GeneralizedTime') UTCTime = Keyword('UTCTime') ObjectDescriptor = Keyword('ObjectDescriptor') # Literals number = Word(nums) signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B') hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H') # Comments hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE) comment = hyphen_comment | cStyleComment # identifier identifier = build_identifier('[a-z]') # references # these are duplicated to force unique token annotations valuereference = build_identifier('[a-z]') typereference = build_identifier('[A-Z]') module_reference = build_identifier('[A-Z]') reference = valuereference | typereference # TODO: consider object references from 12.1 # values # todo: consider more literals from 16.9 boolean_value = TRUE | FALSE bitstring_value = bstring | hstring # todo: consider more forms from 21.9 integer_value = signed_number null_value = NULL cstring_value = dblQuotedString exponent = CaselessLiteral('e') + signed_number real_value = Combine(signed_number + Optional(Literal('.') + Optional(number)) + Optional(exponent)) # In value range constraints, decimal points must be followed by number, or # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100]) constraint_real_value = Combine(signed_number + Optional(Literal('.') + number) + Optional(exponent)) builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value external_value_reference = module_reference + Suppress( '.') + valuereference defined_value = external_value_reference | valuereference # todo: more options from 13.1 referenced_value = Unique(defined_value) # todo: more options from 16.11 # object identifier value name_form = Unique(identifier) number_form = Unique(number) name_and_number_form = name_form + Suppress('(') + number_form + Suppress( ')') objid_components = name_and_number_form | name_form | number_form | defined_value objid_components_list = OneOrMore(objid_components) object_identifier_value = Suppress('{') + \ (objid_components_list | (defined_value + objid_components_list)) + \ Suppress('}') value = builtin_value | referenced_value | object_identifier_value # definitive identifier value definitive_number_form = Unique(number) definitive_name_and_number_form = name_form + Suppress( '(') + definitive_number_form + Suppress(')') definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form definitive_objid_component_list = OneOrMore(definitive_objid_component) definitive_identifier = Optional( Suppress('{') + definitive_objid_component_list + Suppress('}')) # tags class_ = UNIVERSAL | APPLICATION | PRIVATE class_number = Unique(number) # todo: consider defined values from 30.1 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']') tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS # extensions extension_default = Unique(EXTENSIBILITY_IMPLIED) # values # Forward-declare these, they can only be fully defined once # we have all types defined. There are some circular dependencies. named_type = Forward() type_ = Forward() # constraints # todo: consider the full subtype and general constraint syntax described in 45.* lower_bound = (constraint_real_value | signed_number | referenced_value | MIN) upper_bound = (constraint_real_value | signed_number | referenced_value | MAX) single_value_constraint = Suppress('(') + value + Suppress(')') value_range_constraint = Suppress('(') + lower_bound + Suppress( '..') + upper_bound + Suppress(')') # TODO: Include contained subtype constraint here if we ever implement it. size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + ( single_value_constraint | value_range_constraint) + Optional( Suppress(')')) # types # todo: consider other defined types from 13.1 defined_type = Optional(module_reference + Suppress('.'), default=None) + typereference + Optional( size_constraint, default=None) # TODO: consider exception syntax from 24.1 extension_marker = Unique(ELLIPSIS) component_type_optional = named_type + Suppress(OPTIONAL) component_type_default = named_type + Suppress(DEFAULT) + value component_type_components_of = Suppress(COMPONENTS_OF) + type_ component_type = component_type_components_of | component_type_optional | component_type_default | named_type tagged_type = tag + Optional(IMPLICIT | EXPLICIT, default=None) + type_ named_number_value = Suppress('(') + signed_number + Suppress(')') named_number = identifier + named_number_value named_nonumber = Unique(identifier) enumeration = named_number | named_nonumber set_type = SET + braced_list(component_type | extension_marker) sequence_type = SEQUENCE + braced_list(component_type | extension_marker) sequenceof_type = Suppress(SEQUENCE) + Optional( size_constraint, default=None) + Suppress(OF) + (type_ | named_type) setof_type = Suppress(SET) + Optional( size_constraint, default=None) + Suppress(OF) + (type_ | named_type) choice_type = CHOICE + braced_list(named_type | extension_marker) selection_type = identifier + Suppress('<') + type_ enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker) bitstring_type = BIT_STRING + Optional( braced_list(named_number), default=[]) + Optional( single_value_constraint | size_constraint, default=None) plain_integer_type = INTEGER restricted_integer_type = INTEGER + braced_list(named_number) + Optional( single_value_constraint, default=None) boolean_type = BOOLEAN real_type = REAL null_type = NULL object_identifier_type = OBJECT_IDENTIFIER octetstring_type = OCTET_STRING + Optional(size_constraint) unrestricted_characterstring_type = CHARACTER_STRING restricted_characterstring_type = BMPString | GeneralString | \ GraphicString | IA5String | \ ISO646String | NumericString | \ PrintableString | TeletexString | \ T61String | UniversalString | \ UTF8String | VideotexString | \ VisibleString characterstring_type = ( restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint) useful_type = GeneralizedTime | UTCTime | ObjectDescriptor # ANY type any_type = ANY + Optional(Suppress(DEFINED_BY + identifier)) # todo: consider other builtins from 16.2 simple_type = (any_type | boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint | single_value_constraint) constructed_type = choice_type | sequence_type | set_type value_list_type = restricted_integer_type | enumerated_type builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type referenced_type = defined_type | selection_type # todo: consider other ref:d types from 16.3 type_ << (builtin_type | referenced_type) named_type << (identifier + type_) type_assignment = typereference + '::=' + type_ value_assignment = valuereference + type_ + '::=' + value assignment = type_assignment | value_assignment assignment_list = ZeroOrMore(assignment) # TODO: Maybe handle full assigned-identifier syntax with defined values # described in 12.1, but I haven't been able to find examples of it, and I # can't say for sure what acceptable syntax is. global_module_reference = module_reference + Optional( object_identifier_value, default=None) symbol = Unique(reference) # TODO: parameterized reference? symbol_list = delimitedList(symbol, delim=',') symbols_from_module = Group( Group(symbol_list) + Suppress(FROM) + global_module_reference) symbols_from_module_list = OneOrMore(symbols_from_module) symbols_imported = Unique(symbols_from_module_list) exports = Suppress(EXPORTS) + Optional(symbol_list) + Suppress(';') imports = Suppress(IMPORTS) + Optional(symbols_imported) + Suppress(';') module_body = Optional(exports, default=None) + Optional( imports, default=None) + assignment_list module_identifier = module_reference + definitive_identifier module_definition = module_identifier + Suppress(DEFINITIONS) + Optional(tag_default, default=None) + \ Optional(extension_default, default=None) + Suppress('::=') + \ Suppress(BEGIN) + module_body + Suppress(END) module_definition.ignore(comment) # Mark up the parse results with token tags identifier.setParseAction(annotate('Identifier')) named_number_value.setParseAction(annotate('Value')) tag.setParseAction(annotate('Tag')) class_.setParseAction(annotate('TagClass')) class_number.setParseAction(annotate('TagClassNumber')) type_.setParseAction(annotate('Type')) simple_type.setParseAction(annotate('SimpleType')) choice_type.setParseAction(annotate('ChoiceType')) sequence_type.setParseAction(annotate('SequenceType')) set_type.setParseAction(annotate('SetType')) value_list_type.setParseAction(annotate('ValueListType')) bitstring_type.setParseAction(annotate('BitStringType')) sequenceof_type.setParseAction(annotate('SequenceOfType')) setof_type.setParseAction(annotate('SetOfType')) named_number.setParseAction(annotate('NamedValue')) named_nonumber.setParseAction(annotate('NamedValue')) single_value_constraint.setParseAction(annotate('SingleValueConstraint')) size_constraint.setParseAction(annotate('SizeConstraint')) value_range_constraint.setParseAction(annotate('ValueRangeConstraint')) component_type.setParseAction(annotate('ComponentType')) component_type_optional.setParseAction(annotate('ComponentTypeOptional')) component_type_default.setParseAction(annotate('ComponentTypeDefault')) component_type_components_of.setParseAction( annotate('ComponentTypeComponentsOf')) tagged_type.setParseAction(annotate('TaggedType')) named_type.setParseAction(annotate('NamedType')) type_assignment.setParseAction(annotate('TypeAssignment')) value_assignment.setParseAction(annotate('ValueAssignment')) module_reference.setParseAction(annotate('ModuleReference')) global_module_reference.setParseAction(annotate('GlobalModuleReference')) module_body.setParseAction(annotate('ModuleBody')) module_definition.setParseAction(annotate('ModuleDefinition')) extension_marker.setParseAction(annotate('ExtensionMarker')) name_form.setParseAction(annotate('NameForm')) number_form.setParseAction(annotate('NumberForm')) name_and_number_form.setParseAction(annotate('NameAndNumberForm')) object_identifier_value.setParseAction(annotate('ObjectIdentifierValue')) definitive_identifier.setParseAction(annotate('DefinitiveIdentifier')) definitive_number_form.setParseAction(annotate('DefinitiveNumberForm')) definitive_name_and_number_form.setParseAction( annotate('DefinitiveNameAndNumberForm')) exports.setParseAction(annotate('Exports')) imports.setParseAction(annotate('Imports')) assignment_list.setParseAction(annotate('AssignmentList')) bstring.setParseAction(annotate('BinaryStringValue')) hstring.setParseAction(annotate('HexStringValue')) defined_type.setParseAction(annotate('DefinedType')) selection_type.setParseAction(annotate('SelectionType')) referenced_value.setParseAction(annotate('ReferencedValue')) start = OneOrMore(module_definition) return start
column_name = Combine( Suppress(Literal('col("')) + Word(alphas, f"{alphanums}_.").setResultsName("column") + Suppress(Literal('")'))) gt = Literal(">") lt = Literal("<") ge = Literal(">=") le = Literal("<=") eq = Literal("==") ops = (gt ^ lt ^ ge ^ le ^ eq).setResultsName("op") fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?").setResultsName( "fnumber") condition_value = Suppress('"') + Word(f"{alphanums}._").setResultsName( "condition_value") + Suppress('"') ^ Suppress("'") + Word( f"{alphanums}._").setResultsName("condition_value") + Suppress("'") not_null = CaselessLiteral(".notnull()").setResultsName("notnull") condition = (column_name + not_null).setParseAction(_set_notnull) ^ ( column_name + ops + (fnumber ^ condition_value)) class ConditionParserError(ge_exceptions.GreatExpectationsError): pass class RowConditionParserType(enum.Enum): """Type of condition or parser to be used to interpret a RowCondition Note that many of these are forward looking and are not yet implemented. In the future `GE` can replace the `great_expectations__experimental__` name for the condition_parser and this enum can be used internally instead of strings for the condition_parser user input.
def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") # token definitions identifier = Word(alphanums + "_." ).setName("identifier") double_quoted_string = QuotedString('"', escChar="\\", multiline=True, unquoteResults=False) # dblQuotedString _noncomma = "".join( [ c for c in printables if c != "," ] ) alphastring_ = OneOrMore(CharsNotIn(_noncomma + ' ')) def parse_html(s, loc, toks): return '<%s>' % ''.join(toks[0]) opener = '<' closer = '>' html_text = nestedExpr( opener, closer, ( CharsNotIn( opener + closer ) ) ).setParseAction(parse_html).leaveWhitespace() ID = ( identifier | html_text | double_quoted_string | #.setParseAction(strip_quotes) | alphastring_ ).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID ).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + rbrace.suppress() + Optional(semi.suppress()) ).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph") edge_point << Group( subgraph | graph_stmt | node_id ).setName('edge_point') node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = OneOrMore( (Optional(strict_) + Group((graph_ | digraph_)) + Optional(ID) + graph_stmt).setResultsName("graph") ) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser
class CreateParser(object): """ This class can take a plain "CREATE TABLE" SQL as input and parse it into a Table object, so that we have more insight on the detail of this SQL. Example: sql = 'create table foo ( bar int primary key )' parser = CreateParser(sql) try: tbl_obj = parser.parse() except ParseError: log.error("Failed to parse SQL") This set of BNF rules are basically translated from the MySQL manual: http://dev.mysql.com/doc/refman/5.6/en/create-table.html If you don't know how to change the rule or fix the bug, <Getting Started with Pyparsing> is probably the best book to start with. Also this wiki has all supported functions listed: https://pyparsing.wikispaces.com/HowToUsePyparsing If you want have more information how these characters are matching, add .setDebug(True) after the specific token you want to debug """ # Basic token WORD_CREATE = CaselessLiteral("CREATE").suppress() WORD_TABLE = CaselessLiteral("TABLE").suppress() COMMA = Literal(',').suppress() DOT = Literal('.') LEFT_PARENTHESES = Literal('(').suppress() RIGHT_PARENTHESES = Literal(')').suppress() QUOTE = Literal("'") | Literal('"') BACK_QUOTE = Optional(Literal('`')).suppress() LENGTH = Word(nums) OBJECT_NAME = Word(alphanums + "_" + "-" + "<" + ">" + ":") QUOTED_STRING_WITH_QUOTE = QuotedString( quoteChar="'", escQuote="''", escChar='\\', multiline=True, unquoteResults=False) | QuotedString(quoteChar='"', escQuote='""', escChar='\\', multiline=True, unquoteResults=False) QUOTED_STRING = QuotedString( quoteChar="'", escQuote="''", escChar='\\', multiline=True) | QuotedString( quoteChar='"', escQuote='""', escChar='\\', multiline=True) # Start of a create table statement # Sample: this part of rule will match following section # `table_name` IF NOT EXISTS IF_NOT_EXIST = Optional( CaselessLiteral("IF") + CaselessLiteral("NOT") + CaselessLiteral("EXISTS")).suppress() TABLE_NAME = (QuotedString( quoteChar="`", escQuote="``", escChar='\\', unquoteResults=True) | OBJECT_NAME)('table_name') # Column definition # Sample: this part of rule will match following section # `id` bigint(20) unsigned NOT NULL DEFAULT '0', COLUMN_NAME = (QuotedString( quoteChar="`", escQuote="``", escChar='\\', unquoteResults=True) | OBJECT_NAME)('column_name') COLUMN_NAME_WITH_QUOTE = (QuotedString( quoteChar="`", escQuote="``", escChar='\\', unquoteResults=False) | OBJECT_NAME)('column_name') UNSIGNED = Optional(CaselessLiteral("UNSIGNED"))('unsigned') ZEROFILL = Optional(CaselessLiteral("ZEROFILL"))('zerofill') COL_LEN = Combine(LEFT_PARENTHESES + LENGTH + RIGHT_PARENTHESES, adjacent=False)('length') INT_TYPE = (CaselessLiteral("TINYINT") | CaselessLiteral("SMALLINT") | CaselessLiteral("MEDIUMINT") | CaselessLiteral("INT") | CaselessLiteral("INTERGER") | CaselessLiteral("BIGINT") | CaselessLiteral("BINARY") | CaselessLiteral("BIT")) INT_DEF = (INT_TYPE('column_type') + Optional(COL_LEN) + UNSIGNED + ZEROFILL) VARBINARY_DEF = (CaselessLiteral('VARBINARY')('column_type') + COL_LEN) FLOAT_TYPE = \ CaselessLiteral("REAL") | CaselessLiteral("DOUBLE") |\ CaselessLiteral("FLOAT") | CaselessLiteral("DECIMAL") |\ CaselessLiteral("NUMERIC") FLOAT_LEN = Combine(LEFT_PARENTHESES + LENGTH + Optional(COMMA + LENGTH) + RIGHT_PARENTHESES, adjacent=False, joinString=', ')('length') FLOAT_DEF = (FLOAT_TYPE('column_type') + Optional(FLOAT_LEN) + UNSIGNED + ZEROFILL) # time type definition. They contain type_name and an optional FSP section # Sample: DATETIME[(fsp)] FSP = COL_LEN DT_DEF = ( Combine(CaselessLiteral("TIME") + Optional(CaselessLiteral("STAMP"))) | CaselessLiteral("DATETIME"))('column_type') + Optional(FSP) SIMPLE_DEF = (CaselessLiteral("DATE") | CaselessLiteral("YEAR") | CaselessLiteral("TINYBLOB") | CaselessLiteral("BLOB") | CaselessLiteral("MEDIUMBLOB") | CaselessLiteral("LONGBLOB") | CaselessLiteral("BOOL") | CaselessLiteral("BOOLEAN"))('column_type') OPTIONAL_COL_LEN = Optional(COL_LEN) BINARY = Optional(CaselessLiteral("BINARY"))('binary') CHARSET_NAME = (Optional(QUOTE).suppress() + Word(alphanums + '_')('charset') + Optional(QUOTE).suppress()) COLLATION_NAME = (Optional(QUOTE).suppress() + Word(alphanums + '_')('collate') + Optional(QUOTE).suppress()) CHARSET_DEF = (CaselessLiteral("CHARACTER SET").suppress() + CHARSET_NAME) COLLATE_DEF = (CaselessLiteral("COLLATE").suppress() + COLLATION_NAME) CHAR_DEF = (CaselessLiteral("CHAR")('column_type') + OPTIONAL_COL_LEN + BINARY) VARCHAR_DEF = (CaselessLiteral("VARCHAR")('column_type') + COL_LEN + BINARY) TEXT_TYPE = (CaselessLiteral("TINYTEXT") | CaselessLiteral("TEXT") | CaselessLiteral("MEDIUMTEXT") | CaselessLiteral("LONGTEXT") | CaselessLiteral("DOCUMENT")) TEXT_DEF = (TEXT_TYPE('column_type') + BINARY) ENUM_VALUE_LIST = Group(QUOTED_STRING_WITH_QUOTE + ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE))( 'enum_value_list') ENUM_DEF = (CaselessLiteral("ENUM")('column_type') + LEFT_PARENTHESES + ENUM_VALUE_LIST + RIGHT_PARENTHESES) SET_VALUE_LIST = Group(QUOTED_STRING_WITH_QUOTE + ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE))( 'set_value_list') SET_DEF = (CaselessLiteral("SET")('column_type') + LEFT_PARENTHESES + SET_VALUE_LIST + RIGHT_PARENTHESES) DATA_TYPE = (INT_DEF | FLOAT_DEF | DT_DEF | SIMPLE_DEF | TEXT_DEF | CHAR_DEF | VARCHAR_DEF | ENUM_DEF | SET_DEF | VARBINARY_DEF) # Column attributes come after column type and length NULLABLE = (CaselessLiteral("NULL") | CaselessLiteral("NOT NULL")) DEFAULT_VALUE = (CaselessLiteral("DEFAULT").suppress() + ( Optional(Literal('b'))('is_bit') + QUOTED_STRING_WITH_QUOTE('default') | Combine( CaselessLiteral("CURRENT_TIMESTAMP")('default') + Optional(COL_LEN) ('ts_len')) | Word(alphanums + '_' + '-' + '+')('default'))) ON_UPDATE = (CaselessLiteral("ON") + CaselessLiteral("UPDATE") + (CaselessLiteral("CURRENT_TIMESTAMP")('on_update') + Optional(COL_LEN)('on_update_ts_len'))) AUTO_INCRE = CaselessLiteral("AUTO_INCREMENT") UNIQ_KEY = (CaselessLiteral("UNIQUE") + Optional(CaselessLiteral("KEY")).suppress()) PRIMARY_KEY = (CaselessLiteral("PRIMARY") + Optional(CaselessLiteral("KEY")).suppress()) COMMENT = Combine(CaselessLiteral("COMMENT").suppress() + QUOTED_STRING_WITH_QUOTE, adjacent=False) COLUMN_DEF = Group(COLUMN_NAME + DATA_TYPE + ZeroOrMore( NULLABLE('nullable') | DEFAULT_VALUE | ON_UPDATE | AUTO_INCRE('auto_increment') | UNIQ_KEY('uniq_key') | PRIMARY_KEY('primary') | COMMENT('comment') | CHARSET_DEF | COLLATE_DEF)) COLUMN_LIST = Group(COLUMN_DEF + ZeroOrMore(COMMA + COLUMN_DEF))('column_list') DOCUMENT_PATH = Combine(COLUMN_NAME_WITH_QUOTE + ZeroOrMore(DOT + COLUMN_NAME_WITH_QUOTE)) IDX_COL = ((Group(DOCUMENT_PATH + CaselessLiteral('AS') + (CaselessLiteral('INT') | CaselessLiteral('STRING')) + Optional(COL_LEN, default=''))) | (Group(COLUMN_NAME + Optional(COL_LEN, default='')))) # Primary key section COL_NAME_LIST = Group(IDX_COL + ZeroOrMore(COMMA + IDX_COL)) IDX_COLS = (LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES) WORD_PRI_KEY = (CaselessLiteral("PRIMARY").suppress() + CaselessLiteral("KEY").suppress()) KEY_BLOCK_SIZE = (CaselessLiteral("KEY_BLOCK_SIZE").suppress() + Optional(Literal('=')) + Word(nums)('idx_key_block_size')) INDEX_USING = ( CaselessLiteral("USING").suppress() + (CaselessLiteral("BTREE") | CaselessLiteral("HASH"))('idx_using')) INDEX_OPTION = (ZeroOrMore(KEY_BLOCK_SIZE | COMMENT('idx_comment') | INDEX_USING)) PRI_KEY_DEF = (COMMA + WORD_PRI_KEY + IDX_COLS('pri_list') + INDEX_OPTION) # Index section KEY_TYPE = (CaselessLiteral("FULLTEXT") | CaselessLiteral("SPATIAL"))('key_type') WORD_UNIQUE = CaselessLiteral("UNIQUE")('unique') WORD_KEY = (CaselessLiteral("INDEX").suppress() | CaselessLiteral("KEY").suppress()) IDX_NAME = Optional(COLUMN_NAME) IDX_DEF = (ZeroOrMore( Group(COMMA + Optional(WORD_UNIQUE | KEY_TYPE) + WORD_KEY + IDX_NAME('index_name') + IDX_COLS('index_col_list') + INDEX_OPTION)))('index_section') # Constraint section as this is not a recommended way of using MySQL # we'll treat the whole section as a string CONSTRAINT = Combine( ZeroOrMore(COMMA + Optional(CaselessLiteral('CONSTRAINT')) + # foreign key name except the key word 'FOREIGN' Optional((~CaselessLiteral('FOREIGN') + COLUMN_NAME)) + CaselessLiteral('FOREIGN') + CaselessLiteral('KEY') + LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES + CaselessLiteral('REFERENCES') + COLUMN_NAME + LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES + ZeroOrMore(Word(alphanums))), adjacent=False, joinString=' ')('constraint') # Table option section ENGINE = (CaselessLiteral("ENGINE").suppress() + Optional(Literal('=')).suppress() + COLUMN_NAME('engine').setParseAction(upcaseTokens)) DEFAULT_CHARSET = (Optional(CaselessLiteral("DEFAULT")).suppress() + ((CaselessLiteral("CHARACTER").suppress() + CaselessLiteral("SET").suppress()) | (CaselessLiteral("CHARSET").suppress())) + Optional(Literal('=')).suppress() + Word(alphanums + '_')('charset')) TABLE_COLLATE = (Optional(CaselessLiteral("DEFAULT")).suppress() + CaselessLiteral("COLLATE").suppress() + Optional(Literal('=')).suppress() + COLLATION_NAME) ROW_FORMAT = ( CaselessLiteral("ROW_FORMAT").suppress() + Optional(Literal('=')).suppress() + Word(alphanums + '_')('row_format').setParseAction(upcaseTokens)) TABLE_KEY_BLOCK_SIZE = ( CaselessLiteral("KEY_BLOCK_SIZE").suppress() + Optional(Literal('=')).suppress() + Word(nums) ('key_block_size').setParseAction(lambda s, l, t: [int(t[0])])) COMPRESSION = ( CaselessLiteral("COMPRESSION").suppress() + Optional(Literal('=')).suppress() + Word(alphanums + '_')('compression').setParseAction(upcaseTokens)) # Parse and make sure auto_increment is an interger # parseAction function is defined as fn( s, loc, toks ), where: # s is the original parse string # loc is the location in the string where matching started # toks is the list of the matched tokens, packaged as a ParseResults_ # object TABLE_AUTO_INCRE = ( CaselessLiteral("AUTO_INCREMENT").suppress() + Optional(Literal('=')).suppress() + Word(nums) ('auto_increment').setParseAction(lambda s, l, t: [int(t[0])])) TABLE_COMMENT = (CaselessLiteral("COMMENT").suppress() + Optional(Literal('=')).suppress() + QUOTED_STRING_WITH_QUOTE('comment')) TABLE_OPTION = ZeroOrMore(ENGINE | DEFAULT_CHARSET | TABLE_COLLATE | ROW_FORMAT | TABLE_KEY_BLOCK_SIZE | COMPRESSION | TABLE_AUTO_INCRE | TABLE_COMMENT) # Partition section PARTITION = Optional( Combine(Combine(Optional(Literal('/*!') + Word(nums))) + CaselessLiteral("PARTITION") + CaselessLiteral("BY") + SkipTo(StringEnd()), adjacent=False, joinString=" ")('partition')) @classmethod def generate_rule(cls): # The final rule for the whole statement match return (cls.WORD_CREATE + cls.WORD_TABLE + cls.IF_NOT_EXIST + cls.TABLE_NAME + cls.LEFT_PARENTHESES + cls.COLUMN_LIST + Optional(cls.PRI_KEY_DEF) + cls.IDX_DEF + cls.CONSTRAINT + cls.RIGHT_PARENTHESES + cls.TABLE_OPTION('table_options') + cls.PARTITION) @classmethod def parse(cls, sql): try: if not isinstance(sql, str): sql = sql.decode('utf-8') result = cls.generate_rule().parseString(sql) except ParseException as e: raise ParseError( "Failed to parse SQL, unsupported syntax: {}".format(e), e.line, e.column) inline_pri_exists = False table = models.Table() table.name = result.table_name table_options = [ 'engine', 'charset', 'collate', 'row_format', 'key_block_size', 'compression', 'auto_increment', 'comment' ] for table_option in table_options: if table_option in result: setattr(table, table_option, result.get(table_option)) if 'partition' in result: # pyparsing will convert newline into two after parsing. So we # need to dedup here table.partition = result.partition.replace("\n\n", "\n") if 'constraint' in result: table.constraint = result.constraint for column_def in result.column_list: if column_def.column_type == 'ENUM': column = models.EnumColumn() for enum_value in column_def.enum_value_list: column.enum_list.append(enum_value) elif column_def.column_type == 'SET': column = models.SetColumn() for set_value in column_def.set_value_list: column.set_list.append(set_value) elif column_def.column_type in ('TIMESTAMP', 'DATETIME'): column = models.TimestampColumn() if 'on_update' in column_def: if 'on_update_ts_len' in column_def: column.on_update_current_timestamp = \ "{}({})".format( column_def.on_update, column_def.on_update_ts_len) else: column.on_update_current_timestamp = \ column_def.on_update else: column = models.Column() column.name = column_def.column_name column.column_type = column_def.column_type # We need to check whether each column property exist in the # create table string, because not specifying a "COMMENT" is # different from specifying "COMMENT" equals to empty string. # The former one will ends up being # column=None # and the later one being # column='' if 'comment' in column_def: column.comment = column_def.comment if 'nullable' in column_def: if column_def.nullable == 'NULL': column.nullable = True elif column_def.nullable == 'NOT NULL': column.nullable = False if 'unsigned' in column_def: if column_def.unsigned == 'UNSIGNED': column.unsigned = True if 'default' in column_def: if 'ts_len' in column_def: column.default = "{}({})".format(column_def.default, column_def.ts_len) else: column.default = column_def.default if 'is_bit' in column_def: column.is_default_bit = True if 'charset' in column_def: column.charset = column_def.charset if 'length' in column_def: column.length = column_def.length if 'collate' in column_def: column.collate = column_def.collate if 'auto_increment' in column_def: column.auto_increment = True if 'primary' in column_def: idx_col = models.IndexColumn() idx_col.name = column_def.column_name table.primary_key.column_list.append(idx_col) inline_pri_exists = True table.column_list.append(column) if 'pri_list' in result: if inline_pri_exists: raise ParseError("Multiple primary keys defined") table.primary_key.name = 'PRIMARY' for col in result.pri_list: for name, length in col: idx_col = models.IndexColumn() idx_col.name = name if length: idx_col.length = length table.primary_key.column_list.append(idx_col) if 'idx_key_block_size' in result: table.primary_key.key_block_size = result.pri_key_block_size if 'idx_comment' in result: table.primary_key.comment = result.idx_comment if 'index_section' in result: for idx_def in result.index_section: idx = models.TableIndex() idx.name = idx_def.index_name if 'idx_key_block_size' in idx_def: idx.key_block_size = idx_def.idx_key_block_size if 'idx_comment' in idx_def: idx.comment = idx_def.idx_comment if 'idx_using' in idx_def: idx.using = idx_def.idx_using if 'key_type' in idx_def: idx.key_type = idx_def.key_type if 'unique' in idx_def: idx.is_unique = True for col in idx_def.index_col_list: for col_def in col: if len(col_def) == 4 and col_def[1].upper() == 'AS': (document_path, word_as, key_type, length) = col_def idx_col = models.DocStoreIndexColumn() idx_col.document_path = document_path idx_col.key_type = key_type if length: idx_col.length = length idx.column_list.append(idx_col) else: (name, length) = col_def idx_col = models.IndexColumn() idx_col.name = name if length: idx_col.length = length idx.column_list.append(idx_col) table.indexes.append(idx) return table
def define_dot_parser(self): """Define dot grammar Based on the grammar http://www.graphviz.org/doc/info/lang.html """ # punctuation colon = Literal(":") lbrace = Suppress("{") rbrace = Suppress("}") lbrack = Suppress("[") rbrack = Suppress("]") lparen = Literal("(") rparen = Literal(")") equals = Suppress("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Suppress(";") at = Literal("@") minus = Literal("-") pluss = Suppress("+") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") punctuation_ = "".join([c for c in string.punctuation if c not in '_' ]) + string.whitespace # token definitions identifier = Word(alphanums + "_").setName("identifier") #double_quoted_string = QuotedString('"', multiline=True,escChar='\\', # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine( double_quoted_string + Optional(OneOrMore(pluss + double_quoted_string)), adjacent=False) alphastring_ = OneOrMore(CharsNotIn(punctuation_)) def parse_html(s, loc, toks): return '<<%s>>' % ''.join(toks[0]) opener = '<' closer = '>' try: html_text = pyparsing.nestedExpr( opener, closer, ((CharsNotIn(opener + closer).setParseAction(lambda t: t[0])) )).setParseAction(parse_html) except: log.debug('nestedExpr not available.') log.warning('Old version of pyparsing detected. Version 1.4.8 or ' 'later is recommended. Parsing of html labels may not ' 'work properly.') html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) ID = ( alphastring_ | html_text | quoted_string | #.setParseAction(strip_quotes) | identifier).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = ((OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen))).setName("port_location") port = Combine( (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location)))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName( "attr_list").setResultsName('attrlist') attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = (lbrace + Optional(stmt_list) + rbrace + Optional(semi)).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = ( Optional(subgraph_, '') + Optional(ID, '') + Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph') edge_point << (subgraph | graph_stmt | node_id) node_stmt = (node_id + Optional(attr_list) + Optional(semi)).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi)) graphparser = ((Optional(strict_, 'notstrict') + ((graph_ | digraph_)) + Optional(ID, '') + lbrace + Group(Optional(stmt_list)) + rbrace).setResultsName("graph")) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) node_id.setParseAction(self._proc_node_id) assignment.setParseAction(self._proc_attr_assignment) a_list.setParseAction(self._proc_attr_list) edge_stmt.setParseAction(self._proc_edge_stmt) node_stmt.setParseAction(self._proc_node_stmt) attr_stmt.setParseAction(self._proc_default_attr_stmt) attr_list.setParseAction(self._proc_attr_list_combine) subgraph.setParseAction(self._proc_subgraph_stmt) #graph_stmt.setParseAction(self._proc_graph_stmt) graphparser.setParseAction(self._main_graph_stmt) return graphparser
def fromString(inputText, verbose=False): if verbose: print 'Verbose:', verbose text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) identifier = Word(alphas + "_", alphanums + "_") # Imports idslImport = Suppress(CaselessLiteral("import")) + quote + CharsNotIn( "\";").setResultsName('path') + quote + semicolon idslImports = OneOrMore(idslImport) # Communications implementsList = Group( CaselessLiteral('implements') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) requiresList = Group( CaselessLiteral('requires') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) subscribesList = Group( CaselessLiteral('subscribesTo') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) publishesList = Group( CaselessLiteral('publishes') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) communicationList = implementsList | requiresList | subscribesList | publishesList communications = Group( Suppress(CaselessLiteral("communications")) + op + ZeroOrMore(communicationList) + cl + semicolon) # Language language = Suppress(CaselessLiteral("language")) + ( CaselessLiteral("cpp") | CaselessLiteral("python")) + semicolon # GUI gui = Group( Optional( Suppress(CaselessLiteral("gui")) + CaselessLiteral("Qt") + opp + identifier + clp + semicolon)) # additional options options = Group( Optional( Suppress(CaselessLiteral("options")) + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon)) componentContents = communications.setResultsName( 'communications') & language.setResultsName( 'language') & gui.setResultsName( 'gui') & options.setResultsName('options') component = Suppress( CaselessLiteral("component")) + identifier.setResultsName( "name") + op + componentContents.setResultsName( "properties") + cl + semicolon CDSL = idslImports.setResultsName( "imports") + component.setResultsName("component") CDSL.ignore(cppStyleComment) tree = CDSL.parseString(text) return CDSLParsing.component(tree)
class SearchParser(object): """The parser for bauble.search.MapperSearch """ numeric_value = Regex(r'[-]?\d+(\.\d*)?([eE]\d+)?').setParseAction( NumericToken)('number') unquoted_string = Word(alphanums + alphas8bit + '%.-_*;:') string_value = (quotedString.setParseAction(removeQuotes) | unquoted_string).setParseAction(StringToken)('string') none_token = Literal('None').setParseAction(NoneToken) empty_token = Literal('Empty').setParseAction(EmptyToken) value_list = Forward() typed_value = (Literal("|") + unquoted_string + Literal("|") + value_list + Literal("|")).setParseAction(TypedValueToken) value = (typed_value | numeric_value | none_token | empty_token | string_value).setParseAction(ValueToken)('value') value_list << Group(OneOrMore(value) ^ delimitedList(value) ).setParseAction(ValueListAction)('value_list') domain = Word(alphas, alphanums) binop = oneOf('= == != <> < <= > >= not like contains has ilike ' 'icontains ihas is') equals = Literal('=') star_value = Literal('*') domain_values = (value_list.copy())('domain_values') domain_expression = ( (domain + equals + star_value + stringEnd) | (domain + binop + domain_values + stringEnd) ).setParseAction(DomainExpressionAction)('domain_expression') AND_ = WordStart() + (CaselessLiteral("AND") | Literal("&&")) + WordEnd() OR_ = WordStart() + (CaselessLiteral("OR") | Literal("||")) + WordEnd() NOT_ = WordStart() + (CaselessLiteral("NOT") | Literal('!')) + WordEnd() BETWEEN_ = WordStart() + CaselessLiteral("BETWEEN") + WordEnd() query_expression = Forward()('filter') identifier = Group(delimitedList(Word(alphas + '_', alphanums + '_'), '.')).setParseAction(IdentifierToken) ident_expression = ( Group(identifier + binop + value).setParseAction(IdentExpressionToken) | (Literal('(') + query_expression + Literal(')')).setParseAction(ParenthesisedQuery)) between_expression = Group(identifier + BETWEEN_ + value + AND_ + value).setParseAction(BetweenExpressionAction) query_expression << infixNotation( (ident_expression | between_expression), [(NOT_, 1, opAssoc.RIGHT, SearchNotAction), (AND_, 2, opAssoc.LEFT, SearchAndAction), (OR_, 2, opAssoc.LEFT, SearchOrAction)]) query = (domain + Keyword('where', caseless=True).suppress() + Group(query_expression) + stringEnd).setParseAction(QueryAction) statement = (query('query') | domain_expression('domain') | value_list('value_list') ).setParseAction(StatementAction)('statement') def parse_string(self, text): '''request pyparsing object to parse text `text` can be either a query, or a domain expression, or a list of values. the `self.statement` pyparsing object parses the input text and return a pyparsing.ParseResults object that represents the input ''' return self.statement.parseString(text)
################# # BEGIN GRAMMAR ################# COLON = Literal(":").suppress() CONCAT = Literal("+").suppress() EQUALS = Literal("=").suppress() LANGLE = Literal("<").suppress() LBRACE = Literal("[").suppress() LPAREN = Literal("(").suppress() PERIOD = Literal(".").suppress() RANGLE = Literal(">").suppress() RBRACE = Literal("]").suppress() RPAREN = Literal(")").suppress() CATEGORIES = CaselessLiteral("categories").suppress() END = CaselessLiteral("end").suppress() FONT = CaselessLiteral("font").suppress() HINT = CaselessLiteral("hint").suppress() ITEM = CaselessLiteral("item").suppress() OBJECT = CaselessLiteral("object").suppress() attribute_value_pair = Forward() # this is recursed in item_list_entry simple_identifier = Word(alphas, alphanums + "_") identifier = Combine(simple_identifier + ZeroOrMore(Literal(".") + simple_identifier)) object_name = identifier object_type = identifier # Integer and floating point values are converted to Python longs and floats, respectively.
def parse_file(self): """Parses an existing namelist file and creates a deck of cards to hold the data. After this is executed, you need to call the ``load_model()`` method to extract the variables from this data structure.""" infile = open(self.filename, 'r') data = infile.readlines() infile.close() # Lots of numerical tokens for recognizing various kinds of numbers digits = Word(nums) dot = "." sign = oneOf("+ -") ee = CaselessLiteral('E') | CaselessLiteral('D') num_int = ToInteger(Combine( Optional(sign) + digits )) num_float = ToFloat(Combine( Optional(sign) + ((digits + dot + Optional(digits)) | (dot + digits)) + Optional(ee + Optional(sign) + digits) )) # special case for a float written like "3e5" mixed_exp = ToFloat(Combine( digits + ee + Optional(sign) + digits )) # I don't suppose we need these, but just in case (plus it's easy) nan = ToFloat(oneOf("NaN Inf -Inf")) numval = num_float | mixed_exp | num_int | nan strval = QuotedString(quoteChar='"') | QuotedString(quoteChar="'") b_list = "T TRUE True true F FALSE False false .TRUE. .FALSE. .T. .F." boolval = ToBool(oneOf(b_list)) fieldval = Word(alphanums) # Tokens for parsing a line of data numstr_token = numval + ZeroOrMore(Suppress(',') + numval) \ | strval data_token = numstr_token | boolval index_token = Suppress('(') + num_int + Suppress(')') card_token = Group(fieldval("name") + Optional(index_token("index")) + Suppress('=') + Optional(num_int("dimension") + Suppress('*')) + data_token("value") + Optional(Suppress('*') + num_int("dimension"))) multi_card_token = (card_token + ZeroOrMore(Suppress(',') + card_token)) array_continuation_token = numstr_token.setResultsName("value") array2D_token = fieldval("name") + Suppress("(") + \ Suppress(num_int) + Suppress(',') + \ num_int("index") + Suppress(')') + \ Suppress('=') + numval + \ ZeroOrMore(Suppress(',') + numval) # Tokens for parsing the group head and tai group_end_token = Literal("/") | \ Literal("$END") | Literal("$end") | \ Literal("&END") | Literal("&end") group_name_token = (Literal("$") | Literal("&")) + \ Word(alphanums).setResultsName("name") + \ Optional(multi_card_token) + \ Optional(group_end_token) # Comment Token comment_token = Literal("!") # Loop through each line and parse. current_group = None for line in data: line_base = line line = line.strip() # blank line: do nothing if not line: continue if current_group: # Skip comment cards if comment_token.searchString(line): pass # Process orindary cards elif multi_card_token.searchString(line): cards = multi_card_token.parseString(line) for card in cards: name, value = _process_card_info(card) self.cards[-1].append(Card(name, value)) # Catch 2D arrays like -> X(1,1) = 3,4,5 elif array2D_token.searchString(line): card = array2D_token.parseString(line) name = card[0] index = card[1] value = array(card[2:]) if index > 1: old_value = self.cards[-1][-1].value new_value = vstack((old_value, value)) self.cards[-1][-1].value = new_value else: self.cards[-1].append(Card(name, value)) # Arrays can be continued on subsequent lines # The value of the most recent card must be turned into an # array and appended elif array_continuation_token.searchString(line): card = array_continuation_token.parseString(line) if len(card) > 1: element = array(card[0:]) else: element = card.value if isinstance(self.cards[-1][-1].value, ndarray): new_value = append(self.cards[-1][-1].value, element) else: new_value = array([self.cards[-1][-1].value, element]) self.cards[-1][-1].value = new_value # Lastly, look for the group footer elif group_end_token.searchString(line): current_group = None # Everything else must be a pure comment # Group ending '/' can also conclude a data line. if line[-1] == '/': current_group = None else: group_name = group_name_token.searchString(line) # Group Header if group_name: group_name = group_name_token.parseString(line) current_group = group_name.name self.add_group(current_group) # Sometimes, variable definitions are included on the # same line as the namelist header if len(group_name) > 2: cards = group_name[2:] for card in cards: # Sometimes an end card is on the same line. if group_end_token.searchString(card): current_group = None else: name, value = _process_card_info(card) self.cards[-1].append(Card(name, value)) # If there is an ungrouped card at the start, take it as the # title for the analysis elif len(self.cards) == 0 and self.title == '': self.title = line # All other ungrouped cards are saved as free-form (card-less) # groups. # Note that we can't lstrip because column spacing might be # important. else: self.add_group(line_base.rstrip())
def create_bnf(stack): point = Literal(".") comma = Literal(",") e = CaselessLiteral("E") inumber = Word(nums) fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) _of = Literal('of') _in = Literal('in') _by = Literal('by') _copy = Literal('copy') _mn = Literal('-n').setParseAction(replace('OA_SubN')) _me = Literal('-e').setParseAction(replace('OA_SubE')) _pn = Literal('+n').setParseAction(replace('OA_AddN')) _pe = Literal('+e').setParseAction(replace('OA_AddE')) _inn = Literal('*n').setParseAction(replace('OA_IntersectN')) _ine = Literal('*e').setParseAction(replace('OA_IntersectE')) regop = (_mn | _me | _pn | _pe | _inn | _ine) lpar = Literal("(").suppress() rpar = Literal(")").suppress() _all = Literal('all').setParseAction(replace('KW_All')) node = Literal('node') nodes = Literal('nodes') element = Literal('element') elements = Literal('elements') group = Literal('group') _set = Literal('set') surface = Literal('surface') ident = Word(alphas + '_.', alphanums + '_.') set_name = Word(nums) | ident function = Word(alphas + '_', alphanums + '_') function = Group(function).setParseAction(join_tokens) region = Combine( Literal('r.') + Word(alphas + '_', '_' + alphas + nums + '.')) region = Group(Optional(_copy, default='nocopy') + region) region.setParseAction(replace('KW_Region', keep=True)) coor = oneOf('x y z') boolop = oneOf('& |') relop = oneOf('< > <= >= != ==') bool_term = (ZeroOrMore('(') + (coor | fnumber) + relop + (coor | fnumber) + ZeroOrMore(')')) relation = Forward() relation << (ZeroOrMore('(') + bool_term + ZeroOrMore(boolop + relation) + ZeroOrMore(')')) relation = Group(relation).setParseAction(join_tokens) nos = Group(nodes + _of + surface).setParseAction(replace('E_NOS')) nir = Group(nodes + _in + relation).setParseAction( replace('E_NIR', keep=True)) nbf = Group(nodes + _by + function).setParseAction( replace('E_NBF', keep=True)) ebf = Group(elements + _by + function).setParseAction( replace('E_EBF', keep=True)) eog = Group(elements + _of + group + Word(nums)).setParseAction( replace('E_EOG', keep=True)) nog = Group(nodes + _of + group + Word(nums)).setParseAction( replace('E_NOG', keep=True)) onir = Group(node + _in + region).setParseAction( replace_with_region('E_ONIR', 2)) ni = Group(node + delimitedList(inumber)).setParseAction( replace('E_NI', keep=True)) ei1 = Group(element + delimitedList(inumber)).setParseAction( replace('E_EI1', keep=True)) etuple = (lpar.suppress() + inumber + comma.suppress() + inumber + rpar.suppress()) ei2 = Group(element + delimitedList(etuple)).setParseAction( replace('E_EI2', keep=True)) noset = Group(nodes + _of + _set + set_name).setParseAction( replace('E_NOSET', keep=True)) eoset = Group(elements + _of + _set + set_name).setParseAction( replace('E_EOSET', keep=True)) region_expression = Forward() atom1 = (_all | region | ni | onir | nos | nir | nbf | ei1 | ei2 | ebf | eog | nog | noset | eoset) atom1.setParseAction(to_stack(stack)) atom2 = (lpar + region_expression.suppress() + rpar) atom = (atom1 | atom2) aux = (regop + region_expression) aux.setParseAction(to_stack(stack)) region_expression << atom + ZeroOrMore(aux) region_expression = StringStart() + region_expression + StringEnd() return region_expression
def sqlparse(sql): # Define SQL tokens print("SQL Validating") selectStmt = Forward() SELECT = Keyword("select", caseless=True).addParseAction(upcaseTokens) FROM = Keyword("from", caseless=True).addParseAction(upcaseTokens) WHERE = Keyword("where", caseless=True).addParseAction(upcaseTokens) AS = Keyword("as", caseless=True).addParseAction(upcaseTokens) UNION = Keyword("union", caseless=True).addParseAction(upcaseTokens) INTERSECT = Keyword("intersect", caseless=True).addParseAction(upcaseTokens) EXCEPT = Keyword("except", caseless=True).addParseAction(upcaseTokens) COUNT = Keyword("count", caseless=True).addParseAction(upcaseTokens) MAX = Keyword("max", caseless=True).addParseAction(upcaseTokens) AVG = Keyword("avg", caseless=True).addParseAction(upcaseTokens) SUM = Keyword("sum", caseless=True).addParseAction(upcaseTokens) ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = (delimitedList(ident, ".", combine=True)).setName("column name").addParseAction(upcaseTokens) columnNameList = Group(delimitedList(columnName)) tableName = (delimitedList(ident, ".", combine=True)).setName("table name").addParseAction(upcaseTokens) tableNameAs = (delimitedList(ident + " " + AS + " " + ident, ",", combine=True)).setName("table name").addParseAction( upcaseTokens) tableNameList = delimitedList(tableName) funcs = ((COUNT | MAX | AVG | SUM) + "(" + ("*" | columnName) + ")") whereExpression = Forward() and_ = Keyword("and", caseless=True).addParseAction(upcaseTokens) or_ = Keyword("or", caseless=True).addParseAction(upcaseTokens) in_ = Keyword("in", caseless=True).addParseAction(upcaseTokens) GROUP_BY = Keyword("group by", caseless=True).addParseAction(upcaseTokens) HAVING = Keyword("having", caseless=True).addParseAction(upcaseTokens) CONTAINS = Keyword("contains", caseless=True).addParseAction(upcaseTokens) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine(Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine(Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) columnRval = realNum | intNum | quotedString | columnName whereCondition = Group( (funcs + binop + columnRval) | (columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + selectStmt + ")") | ("(" + whereExpression + ")") ) whereExpression << whereCondition + Optional(Group(GROUP_BY + columnName + Optional( HAVING + Group((funcs + binop + columnRval) | (columnName + binop + columnRval)) + ZeroOrMore( (and_ | or_) + Group((funcs + binop + columnRval) | (columnName + binop + columnRval)))))) + ZeroOrMore( (and_ | or_) + whereExpression) # Define the SQL grammar selectStmt <<= (SELECT + ('*' | Group(delimitedList(Group((funcs | columnName) + Optional(AS + ident)))))("columns") + \ FROM + Group(delimitedList(Group(tableName + Optional(AS + ident))))("tables") + \ Optional(Group(WHERE + whereExpression), "")("where")) + \ Optional((UNION + selectStmt)("union") | (INTERSECT + selectStmt)("intersect") | (EXCEPT + selectStmt)( "except") | (CONTAINS + selectStmt)("contains")) SQLParser = selectStmt # TODO - make paranthesies optional around a selectStmt (test h) # Begin validation try: print(sql, "\n-----\n", SQLParser.parseString(sql), "\n") parsedQuery = SQLParser.parseString(sql) except Exception as e: print("-------------------------------------------") print("SYNTAX ERROR PARSING: " + sql) print("-------------------------------------------") print("ERROR MESSAGE:") print("-------------------------------------------") print(e) # List of tables being used tables = parsedQuery[3] # attributes: list of attributes and their type (comes after select) attributes = parsedQuery[1] # Define the schema sailors = ( ("tname", "sailors"), ("sid", "int"), ("sname", "str"), ("rating", "int"), ("age", "real") ) boats = ( ("tname", "boats"), ("bid", "int"), ("bname", "str"), ("color", "str") ) reserves = ( ("tname", "reserves"), ("sid", "int"), ("bid", "int"), ("day", "date") ) print("Error messages (if any):") # Check if the table used in the query are valid based on the schema for item in tables: if (str(item[0]).upper() != sailors[0][1].upper()) and (str(item[0]).upper() != reserves[0][1].upper()) and ( str(item[0]).upper() != boats[0][1].upper()): print(item[0] + " is not a table in the schema.") # Do something since a table is invalid # Check if the select attributes are valid according to the schema and what tables are being used in the query # - Iterate through each attributes # - Check if it's a built-in function, if it is then get the 2 index (that will be the attribute) # - If it's not a build in function, then get the 0 index (that will be the attribute) # - Check if that attribute is in any of the tables # - If it is, make sure that table is being used in the query (check if the table is in 'tables') attrTablePairs = [] for attribute in attributes: # Extract the correct attribute if (str(attribute[0]).upper() == "COUNT") or (str(attribute[0]).upper() == "MAX") or ( str(attribute[0]).upper() == "AVG") or (str(attribute[0]).upper() == "SUM"): attr = attribute[2] else: attr = attribute[0] if "." in attr: attr = attr.split(".")[1] # Check if the attribute is in any of the tables in the schema isInTable = False attrTableName = "" for item in sailors: if (item[0].upper() == attr or attr == "*"): isInTable = True attrTableName = "SAILORS" break for item in boats: if (item[0].upper() == attr or attr == "*"): isInTable = True attrTableName = "BOATS" break for item in reserves: if (item[0].upper() == attr or attr == "*"): isInTable = True attrTableName = "RESERVES" break if (isInTable == False): print(attr + " is not an attribute in the schema.") # Do something since an attribute is invalid else: print(attr + " is in the table " + attrTableName) # Build list of attr, table pairs attrTablePairs.append((attr, attrTableName)) # Check to see if the corresponding table is being used in the query for pair in attrTablePairs: beingUsed = False if (pair[0] == "*"): beingUsed = True for table in tables: if (pair[1] == str(table[0].upper())): beingUsed = True break if (beingUsed == False): # Attribute is invalid as the table it belongs to is not being used in the query print(str(pair[0]) + " is invalid as the table it belongs to (" + str( pair[1]) + ") is not being used in the query.") # Check if the attributes being used in the WHERE stmnt are valid # - Check if WHERE stmnt exists if (len(parsedQuery) >= 5): whereExp = parsedQuery[4] for exp in whereExp: if (exp != "WHERE" and exp != "AND" and exp != "OR"): if (exp[0] == "GROUP BY"): valid = False for attr in attrTablePairs: if (str(exp[1]).upper() == str(attr[0]).upper()): valid = True break if (valid == False): print(exp[1] + " in the group by clause is not a valid attribute") if (len(exp) >= 3): if (str(exp[2]).upper() == "HAVING"): print("") else: if (exp[0] == "COUNT" or exp[0] == "MAX" or exp[0] == "AVG" or exp[0] == "SUM"): # Check if the attribute is valid valid = False for item in sailors: if (item[0].upper() == str(exp[2]).upper()): valid = True break for item in boats: if (item[0].upper() == str(exp[2]).upper()): valid = True break for item in reserves: if (item[0].upper() == str(exp[2]).upper()): valid = True break if (valid == False): print(exp[2] + " in the where clause is not a valid attribute") elif ("." in exp[0]): # Check if the attribute is valid valid = False myAttr = exp[0].split(".")[1] for item in sailors: if (item[0].upper() == myAttr.upper()): valid = True break for item in boats: if (item[0].upper() == myAttr.upper()): valid = True break for item in reserves: if (item[0].upper() == myAttr.upper()): valid = True break if (valid == False): print(exp[0] + " in the where clause is not a valid attribute") else: # Check if the attribute is valid valid = False for item in sailors: if (item[0].upper() == str(exp[0].upper())): valid = True break for item in boats: if (item[0].upper() == str(exp[0].upper())): valid = True break for item in reserves: if (item[0].upper() == str(exp[0].upper())): valid = True break if (valid == False): print(exp[0] + " in the where clause is not a valid attribute") # RELATIONAL ALGEBRA TRANSLATION Aggfunc = ['COUNT', 'MAX', 'MAX', 'AVG', 'SUM'] Aggfunc2 = ['GROUP BY', "HAVING"] # SELECT conversion # Create Regular Expression string Rastr = '[(Projection)' # first element of section first = True rename = False # SELECT conversion try: for column in parsedQuery[1]: if first: if str(column[0]) in Aggfunc: Rastr = Rastr + str(column[0]) + '(' + str(column[2]) + ')' else: Rastr = Rastr + str(column[0]) first = False else: if str(column[0]) in Aggfunc: Rastr = Rastr + ',' + str(column[0]) + '(' + str(column[2]) + ')' else: Rastr = Rastr + ',' + str(column[0]) # Rename Set found=False if column.__len__() > 1: if 'AS' in str(column): for item in (column): if str(item)=="AS": found=True elif found: if rename: renastr = renastr + ',' + str(column[2]) else: renastr = "(Rename)" + "[" + str(column[2]) + '<-' + str(column[0]) + ',' rename = True found=False if rename: Rastr = renastr + "]" + Rastr Rastr+="]" # WHERE conversion wheref=False for attr in whereExp: #if agg function detected aggfunc1 = False aggfunc2 = False #checks if and/or found if str(attr) == "AND" or str(attr) == 'OR': Rastr = Rastr + str(attr) + " " # checks for where statement and converts to select elif str(attr) == "WHERE": Rastr = Rastr + '(Select)[' whereF=True else: for item in attr: if item in Aggfunc: Rastr = Rastr + str(attr[0]) + '(' + str(attr[2]) + ')' + ' = ' + str(attr[5]) + ' ' aggfunc1 = True elif str(item) in Aggfunc2: Rastr = Rastr + str(item) + '(' aggfunc2 = True else: if aggfunc2: if item[0] in Aggfunc: Rastr = Rastr + str(item[0]) + '(' + str(item[2]) + ')' + '=' + str(item[5]) + ' ' elif str(item) == "AND" or str(item) == "OR": Rastr = Rastr + str(item) + " " else: Rastr = Rastr + str(item) + ') ' elif not aggfunc1: Rastr = Rastr + str(item) + ' ' if aggfunc2: Rastr = Rastr + ')' if wheref: Rastr = Rastr + ']' # FROM conversion of SQL Rastr = Rastr + '[' first = True for table in tables: if first: if table.__len__() == 1: Rastr = Rastr + str(table[0]) else: Rastr = Rastr + '(Rename)[' + str(table[2]) + ']' + str(table[0]) first = False else: if table.__len__() == 1: Rastr = Rastr + ' x ' + str(table[0]) else: Rastr = Rastr + 'x (Rename)[' + str(table[2]) + ']' + str(table[0]) Rastr = Rastr + ']' print(Rastr) except Exception as e: Rast="" print("Error:",e) return Rastr
def __init__(self, alert='!'): self.dm_expr_head = Optional(CaselessLiteral(alert)) self.expr_head = CaselessLiteral('!') self.commands = [] self.reinit_exprs()
quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE # Numbers can just be numbers. Only integers though. number = Regex('[0-9]+') # Basis characters (by exclusion) for variable / field names. The following # list of characters is from the btparse documentation any_name = Regex('[^\s"#%\'(),={}]+') # btparse says, and the test bibs show by experiment, that macro and field names # cannot start with a digit. In fact entry type names cannot start with a digit # either (see tests/bibs). Cite keys can start with a digit not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*') # Comment comments out to end of line comment = (AT + CaselessLiteral('comment') + Regex("[\s{(].*").leaveWhitespace()) # The name types with their digiteyness not_dig_lower = not_digname.copy().setParseAction(lambda t: t[0].lower()) macro_def = not_dig_lower.copy() macro_ref = not_dig_lower.copy().setParseAction(lambda t: Macro(t[0].lower())) field_name = not_dig_lower.copy() # Spaces in names mean they cannot clash with field names entry_type = not_dig_lower('entry_type') cite_key = any_name('cite_key') # Number has to be before macro name string = (number | macro_ref | quoted_string | curly_string) # There can be hash concatenation field_value = string + ZeroOrMore(HASH + string)
ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = Upcase(delimitedList(ident, ".", combine=True)) columnNameList = Group(delimitedList(columnName)) columnNameList1 = Group(delimitedList(columnName)) groupNameList = Group(delimitedList(columnName)) tableName = Upcase(delimitedList(ident, ".", combine=True)) tableName2 = Upcase(delimitedList(ident, ".", combine=True)) tableAlias = tableName + astoken + tableName2 tableNameList = Group(delimitedList(tableAlias | tableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) oper = oneOf("intersect union except contains", caseless=True) oper1 = oneOf("count min max avg", caseless=True) asoper = oneOf("as", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions
def _parse_study_search_string(self, searchstr, only_with_processed_data=False): """parses string into SQL query for study search Parameters ---------- searchstr : str The string to parse only_with_processed_data : bool Whether or not to return studies with processed data. Returns ------- study_sql : str SQL query for selecting studies with the required metadata columns sample_sql : str SQL query for each study to get the sample ids that mach the query meta_headers : list metadata categories in the query string in alphabetical order Notes ----- All searches are case-sensitive References ---------- .. [1] McGuire P (2007) Getting started with pyparsing. """ # build the parse grammar category = Word(alphas + nums + "_") seperator = oneOf("> < = >= <= !=") | CaselessLiteral("includes") | \ CaselessLiteral("startswith") value = Word(alphas + nums + "_" + ":" + ".") | \ dblQuotedString().setParseAction(removeQuotes) criterion = Group(category + seperator + value) criterion.setParseAction(SearchTerm) and_ = CaselessLiteral("and") or_ = CaselessLiteral("or") not_ = CaselessLiteral("not") optional_seps = Optional(and_ | or_ | not_) # create the grammar for parsing operators AND, OR, NOT search_expr = operatorPrecedence(criterion, [(not_, 1, opAssoc.RIGHT, SearchNot), (and_, 2, opAssoc.LEFT, SearchAnd), (or_, 2, opAssoc.LEFT, SearchOr)]) # parse the search string to get out the SQL WHERE formatted query eval_stack = (search_expr + stringEnd).parseString(searchstr)[0] sql_where = eval_stack.generate_sql() # parse out all metadata headers we need to have in a study, and # their corresponding types all_headers = [ c[0][0].term[0] for c in (criterion + optional_seps).scanString(searchstr) ] meta_headers = set(all_headers) all_types = [ c[0][0].term[2] for c in (criterion + optional_seps).scanString(searchstr) ] # sort headers and types so they return in same order every time. # Should be a relatively short list so very quick # argsort implementation taken from # http://stackoverflow.com/questions/3382352/ # equivalent-of-numpy-argsort-in-basic-python sort_order = sorted(range(len(all_headers)), key=all_headers.__getitem__) all_types = [all_types[x] for x in sort_order] all_headers.sort() # At this point it is possible that a metadata header has been # reference more than once in the query. If the types agree, then we # do not need to do anything. If the types do not agree (specifically, # if it appears to be numerical in one case and string in another), # then we need to give varchar the precedence. meta_header_type_lookup = dict() for header, header_type in zip(all_headers, all_types): if header not in meta_header_type_lookup: meta_header_type_lookup[header] = header_type else: if header_type == 'varchar' or \ meta_header_type_lookup[header] == 'varchar': meta_header_type_lookup[header] = 'varchar' # create the study finding SQL # remove metadata headers that are in study table meta_headers.discard('sample_id') meta_headers = tuple(meta_headers.difference(self.study_cols)) # get all study ids that contain all metadata categories searched for sql = [] if meta_headers: # have study-specific metadata, so need to find specific studies for meta in meta_headers: sql.append("SELECT DISTINCT table_name FROM " "information_schema.columns WHERE " "lower(column_name) = lower('{0}')".format( qdb.util.scrub_data(meta))) else: # no study-specific metadata, so need all studies sql.append("SELECT DISTINCT table_name " "FROM information_schema.columns") # combine the query if only_with_processed_data: sql.append("SELECT DISTINCT 'sample_' || CAST(study_id AS VARCHAR)" "FROM qiita.study_artifact " "JOIN qiita.artifact USING (artifact_id) " "JOIN qiita.artifact_type USING (artifact_type_id) " "WHERE artifact_type = 'BIOM'") # restrict to studies in portal sql.append("SELECT 'sample_' || CAST(study_id AS VARCHAR) " "FROM qiita.study_portal " "JOIN qiita.portal_type USING (portal_type_id) " "WHERE portal = '%s'" % qiita_config.portal) study_sql = ' INTERSECT '.join(sql) # create the sample finding SQL, getting both sample id and values # build the sql formatted list of metadata headers header_info = [] for meta in meta_header_type_lookup: if meta in self.study_cols: header_info.append("st.%s" % meta) else: header_info.append("sa.%s" % meta) # build the SQL query sample_sql = ("SELECT ss.sample_id, %s " "FROM qiita.study_sample ss " "JOIN qiita.sample_{0} sa ON ss.sample_id = sa.sample_id" " JOIN qiita.study st ON st.study_id = ss.study_id " "WHERE %s" % (','.join(header_info), sql_where)) return study_sql, sample_sql, meta_header_type_lookup.keys()
print('toks:', toks) if len(toks[0]): if (toks[0].lower() == 'true') or (toks[0].lower() == 'yes'): return True elif (toks[0].lower() == 'false') or (toks[0].lower() == 'no'): return False if toks[0].isnumeric(): if int(toks[0]) == 1: return True elif int(toks[0]) == 0: return False else: return False isc_boolean = (CaselessLiteral('true') | CaselessLiteral('false') | CaselessLiteral('yes') | CaselessLiteral('no') | Literal('1') | Literal('0')) find_pattern = Group( Word(alphanums + '_-/:.')('isc_boolean').setParseAction(convertBoolean))( 'find_pattern') parse_me(find_pattern, 'TRUE') parse_me(find_pattern, 'True') parse_me(find_pattern, 'true') parse_me(find_pattern, 'yes') parse_me(find_pattern, 'Yes')
# Most of these markers could be SuffixMarkers (which arise due to errors in # the regulation text). We'll wait until we see explicit examples before # converting them though, to limit false matches paragraph_marker = Marker("paragraph") paragraphs_marker = SuffixMarker("paragraphs") part_marker = Marker("part") parts_marker = Marker("parts") subpart_marker = Marker("subpart") comment_marker = ((Marker("comment") | Marker("commentary") | (Marker("official") + Marker("interpretations")) | (Marker("supplement") + Suppress(WordBoundaries("I")))) + Optional(Marker("of") | Marker("to"))) comments_marker = Marker("comments") appendix_marker = Marker("appendix") appendices_marker = Marker("appendices") conj_phrases = ((Suppress(",") + Optional(Marker("and") | Marker("or"))) | Marker("and") | Marker("or") | (Marker("except") + Marker("for")) | Suppress("-") | WordBoundaries( CaselessLiteral("through")).setResultsName("through")) title = Word(string.digits).setResultsName("cfr_title")
def get_grammar(self): """ Defines our grammar for mathematical expressions. Possibly helpful: - BNF form of context-free grammar https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form - Some pyparsing docs http://infohost.nmt.edu/~shipman/soft/pyparsing/web/index.html """ # Define + and - plus = Literal("+") minus = Literal("-") plus_minus = plus | minus # 1 or 1.0 or .1 number_part = Word(nums) inner_number = Combine((number_part + Optional("." + Optional(number_part))) | ("." + number_part)) # Combine() joints the matching parts together in a single token, # and requires that the matching parts be contiguous (no spaces) # Define our suffixes suffix = Word(alphas + '%') suffix.setParseAction(self.suffix_parse_action) # Construct number as a group consisting of a text string ("num") and an optional suffix. # num can include a decimal number and numerical exponent, and can be # converted to a number using float() # suffix may contain alphas or % # Spaces are ignored inside numbers # Group wraps everything up into its own ParseResults object when parsing number = Group( Combine( inner_number + Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part), )("num") + Optional(suffix)("suffix") )("number") # Note that calling ("name") on the end of a parser is equivalent to calling # parser.setResultsName, which is used to pull that result out of a parsed # expression like a dictionary. # Construct variable and function names front = Word(alphas, alphanums) # must start with alpha subscripts = Word(alphanums + '_') + ~FollowedBy('{') # ~ = not lower_indices = Literal("_{") + Optional("-") + Word(alphanums) + Literal("}") upper_indices = Literal("^{") + Optional("-") + Word(alphanums) + Literal("}") # Construct an object name in either of two forms: # 1. front + subscripts + tail # 2. front + lower_indices + upper_indices + tail # where: # front (required): # starts with alpha, followed by alphanumeric # subscripts (optional): # any combination of alphanumeric and underscores # lower_indices (optional): # Of form "_{(-)<alphanumeric>}" # upper_indices (optional): # Of form "^{(-)<alphanumeric>}" # tail (optional): # any number of primes name = Combine(front + Optional(subscripts | (Optional(lower_indices) + Optional(upper_indices)) ) + ZeroOrMore("'")) # Define a variable as a pyparsing result that contains one object name variable = Group(name("varname"))("variable") variable.setParseAction(self.variable_parse_action) # initialize recursive grammar expression = Forward() # Construct functions as consisting of funcname and arguments as # funcname(arguments) # where arguments is a comma-separated list of arguments, returned as a list # Must have at least 1 argument function = Group(name("funcname") + Suppress("(") + Group(delimitedList(expression))("arguments") + Suppress(")") )("function") function.setParseAction(self.function_parse_action) # Define parentheses parentheses = Group(Suppress("(") + expression + Suppress(")"))('parentheses') # Define arrays array = Group(Suppress("[") + delimitedList(expression) + Suppress("]"))("array") # atomic units evaluate directly to number or array without binary operations atom = number | function | variable | parentheses | array # Define operations in order of precedence # Define exponentiation, possibly including negative powers power = atom + ZeroOrMore(Suppress("^") + Optional(minus)("op") + atom) power.addParseAction(self.group_if_multiple('power')) # Define negation (e.g., in 5*-3 --> we need to evaluate the -3 first) # Negation in powers is handled separately # This has been arbitrarily assigned a higher precedence than parallel negation = Optional(minus)("op") + power negation.addParseAction(self.group_if_multiple('negation')) # Define the parallel operator 1 || 5 == 1/(1/1 + 1/5) pipes = Literal('|') + Literal('|') parallel = negation + ZeroOrMore(Suppress(pipes) + negation) parallel.addParseAction(self.group_if_multiple('parallel')) # Define multiplication and division product = parallel + ZeroOrMore((Literal('*') | Literal('/'))("op") + parallel) product.addParseAction(self.group_if_multiple('product')) # Define sums and differences # Note that leading - signs are treated by negation sumdiff = Optional(plus) + product + ZeroOrMore(plus_minus("op") + product) sumdiff.addParseAction(self.group_if_multiple('sum')) # Close the recursion expression << sumdiff return expression + stringEnd
def get_grammar_parser(): # Define keywords and literals. import_ = Suppress(CaselessKeyword("import")) grammar_ = Suppress("grammar") def _make_grammar(tokens): # Create a new Grammar object. result = Grammar() # Get the attributes in the header as well as the name. version, charset, language, name = tokens[0:4] # Use charset as the language instead if it is 2 characters long and no # language was specified. if not language and len(charset) == 2: language = charset charset = "" # Set the header attributes and grammar name. result.jsgf_version = version[1:] result.charset_name = charset result.language_name = language result.name = name # Add the remaining imports/rules to the grammar. for token in tokens[4:]: if isinstance(token, Import): result.add_import(token) else: result.add_rule(token) # Return the new grammar object. return result # Define parser elements for the grammar header. version_no = Regex(r"(v|V)(\d+\.\d+|\d+\.|\.\d+)") \ .setName("version number") def optional_header_action(tokens): return tokens if tokens else [""] charset_name = Optional(word.copy()).setName("character set") \ .setParseAction(optional_header_action) language_name = Optional(word.copy()).setName("language name") \ .setParseAction(optional_header_action) header_line = (Suppress(CaselessLiteral("#JSGF")) + version_no + charset_name + language_name + line_delimiter).setName("grammar header") # Define the grammar name line, import statements and rule lines. All lines # should support C++ style comments (/* comment */ or // comment). name_line = (grammar_ + grammar_name + line_delimiter) \ .setName("grammar declaration").ignore(cppStyleComment) import_statement = (import_ + langle + import_name + rangle + line_delimiter) \ .setParseAction(lambda tokens: Import(tokens[0])).ignore(cppStyleComment) # Define the grammar parser element, then set its name and parse action. parser = (header_line + name_line + ZeroOrMore(import_statement) + OneOrMore(rule_parser)) parser.setName("grammar").setParseAction(_make_grammar) return parser
Title: Elements That Provides DNS Resource Records Description: Provides RR-related grammar in PyParsing engine for ISC-configuration style For resource records found in BIND9 DNS zone records """ from pyparsing import Optional, Combine, CaselessLiteral, \ Literal, Char, OneOrMore, Group, ungroup from bind9_parser.isc_utils import semicolon from bind9_parser.isc_domain import domain_generic_fqdn, domain_charset_wildcard g_test_over_63_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abc" rr_class_in = CaselessLiteral('IN') rr_class_ch = CaselessLiteral('CH') rr_class_hesiod = CaselessLiteral('HS') rr_class_none = CaselessLiteral('NONE') # RFC 2136 rr_class_any = CaselessLiteral('ANY') # RFC 1035 rr_class_set = (rr_class_in | rr_class_hesiod | rr_class_ch | rr_class_none | rr_class_any)('rr_class') rr_class_set.setName('<rr_class>') rr_type_a = CaselessLiteral('A') rr_type_ns = CaselessLiteral('NS') rr_type_cname = CaselessLiteral('CNAME')
else: # Append to query the token if negation: t = ~t if operation == 'or': query |= t else: query &= t return query NO_BRTS = printables.replace('(', '').replace(')', '') SINGLE = Word(NO_BRTS.replace('*', '')) WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS) QUOTED = quotedString.setParseAction(removeQuotes) OPER_AND = CaselessLiteral('and') OPER_OR = CaselessLiteral('or') OPER_NOT = '-' TERM = Combine( Optional(Word(alphas).setResultsName('meta') + ':') + (QUOTED.setResultsName('query') | WILDCARDS.setResultsName('query'))) TERM.setParseAction(createQ) EXPRESSION = operatorPrecedence( TERM, [(OPER_NOT, 1, opAssoc.RIGHT), (OPER_OR, 2, opAssoc.LEFT), (Optional(OPER_AND, default='and'), 2, opAssoc.LEFT)]) EXPRESSION.setParseAction(unionQ) QUERY = OneOrMore(EXPRESSION) + StringEnd() QUERY.setParseAction(unionQ)
nums, Combine, oneOf, opAssoc, operatorPrecedence, Suppress, alphanums, alphas, Word, Forward, Regex, ) dot = Literal(".") underscore = "_" eg = CaselessLiteral("eg") si = CaselessLiteral("di") ai = CaselessLiteral("ai") lparen = Literal("(") rparen = Literal(")") lt = Literal("<") le = Literal("<=") gt = Literal(">") ge = Literal(">=") eq = Literal("=") alphanums_extended = alphanums + "-_" # deficion de numero estilo JSON
def create_parser(): """ Build and return a parser for expressions. Parser builds a tuple-based representation of the formula the should be easy to evaluate. Each component is a tuple with: the first element indicats the type, the second element gives a set of columns referenced the rest gives the required arguments for the formula-chunk. Parser throws ParseException if something goes wrong. """ from pyparsing import Literal, Word, Optional, CaselessLiteral, Group, StringStart, StringEnd, Suppress, CharsNotIn, Forward, nums, delimitedList, operatorPrecedence, opAssoc def column_parse(toks): """ Parse a column name and strip off any ".foo" modifier. """ col = toks[0][0] if col.endswith(".max"): col = col[:-4] return ("col", set([col]), col, 'max') elif col.endswith(".percent"): col = col[:-8] return ("col", set([col]), col, 'per') elif col.endswith(".final"): col = col[:-6] return ("col", set([col]), col, 'fin') else: return ("col", set([col]), col, 'val') def actionflag_parse(toks): """ Parse the [[activitytotal]] special case """ flag = toks[0][0] if flag == 'activitytotal': # dependant activity True is a flag meaning "everything": fixed later. return ("flag", set([True]), flag) raise ParseException, "Unknown flag ([[...]])." def real_parse(toks): return ("num", set(), float(''.join(toks))) def func_parse(toks): cols = set() cols.update(*(t[1] for t in toks[0][1:])) return ("func", cols) + tuple(toks[0]) def expr_parse(s, loc, toks): ts = toks[0] if len(ts) == 2: # unary operator return ("sign", ts[1][1], ts[0]) + tuple(ts[1:]) elif len(ts) > 1 and len(ts)%2==1: # one or more ops at the same level # build list of referenced columns: cols = set() cols.update(*(t[1] for t in ts[0::2])) return ("expr", cols, ts[0]) + tuple(ts[1:]) else: raise ParseException, "Unknown expression parsed." sign = Literal("+") | Literal("-") real = (Word( nums ) + "." + Optional( Word(nums) ) + # whole/decimal part Optional( CaselessLiteral("E") + Optional(sign) + Word(nums) ) # scientific notation part ) integer = Word(nums) number = (real | integer).setParseAction(real_parse) # all numbers treated as floats to avoid integer arithmetic rounding # Allow anything except ']' in column names. Let the limitations on sane column names be enforced somewhere else. actionflag = Group(Suppress('[[') + CharsNotIn('[]') + Suppress(']]') ).setParseAction(actionflag_parse) column = Group(Suppress('[') + CharsNotIn('[]') + Suppress(']') ).setParseAction(column_parse) expr = Forward() function_name = ( CaselessLiteral("SUM") | CaselessLiteral("AVG") | CaselessLiteral("MAX") | CaselessLiteral("MIN") | CaselessLiteral("BEST") ) function = Group(function_name + Suppress('(') + delimitedList(expr) + Suppress(')')).setParseAction(func_parse) operand = number | column | function | actionflag signop = Literal("+") | Literal("-") multop = Literal("*") | Literal("/") plusop = Literal("+") | Literal("-") expr << operatorPrecedence( operand, [(signop, 1, opAssoc.RIGHT, expr_parse), (multop, 2, opAssoc.LEFT, expr_parse), (plusop, 2, opAssoc.LEFT, expr_parse),] ) formula = StringStart() + expr + StringEnd() return formula
def BNF(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ #global bnf if not self.bnf: point = Literal(".") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums)))) ident = Word(alphas, alphas + nums + "_$") sensor_ident = (CaselessLiteral('GET_SENSOR_VAL:').suppress() + Word(alphas + nums + "_$")).setParseAction( self.get_sensor_value) and_ = CaselessLiteral('AND') or_ = CaselessLiteral('OR') not_ = CaselessLiteral('NOT') neq = CaselessLiteral('!=') lt = Literal('<') eqlt = CaselessLiteral('<=') gt = Literal('>') eqgt = CaselessLiteral('>=') eq = CaselessLiteral('==') plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() add_op = plus | minus mult_op = mult | div cmp_op = eqlt | eqgt | eq | neq | lt | gt expr = Forward() atom = (Optional("-") + (fnumber | sensor_ident | ident + lpar + expr + rpar).setParseAction(self.pushFirst) | (lpar + expr.suppress() + rpar)).setParseAction( self.pushUMinus) term = atom + ZeroOrMore( (mult_op + atom).setParseAction(self.pushFirst)) add_exp = term + ZeroOrMore( (add_op + term).setParseAction(self.pushFirst)) cmp_exp = add_exp + ZeroOrMore( (cmp_op + add_exp).setParseAction(self.pushFirst)) cmp_not_exp = cmp_exp + ZeroOrMore( (not_ + cmp_exp).setParseAction(self.pushFirst)) cmp_not_and_exp = cmp_not_exp + ZeroOrMore( (and_ + cmp_not_exp).setParseAction(self.pushFirst)) cmp_not_and_or_exp = cmp_not_and_exp + ZeroOrMore( (or_ + cmp_not_and_exp).setParseAction(self.pushFirst)) expr << cmp_not_and_or_exp self.bnf = expr return self.bnf
def pyparse_gml(): """A pyparsing tokenizer for GML graph format. This is not intended to be called directly. See Also -------- write_gml, read_gml, parse_gml Notes ----- This doesn't implement the complete GML specification for nested attributes for graphs, edges, and nodes. """ global graph try: from pyparsing import \ Literal, CaselessLiteral, Word, Forward,\ ZeroOrMore, Group, Dict, Optional, Combine,\ ParseException, restOfLine, White, alphas, alphanums, nums,\ OneOrMore,quotedString,removeQuotes,dblQuotedString except ImportError: raise ImportError, \ "Import Error: not able to import pyparsing: http://pyparsing.wikispaces.com/" if not graph: lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() pound = ("#") comment = pound + Optional(restOfLine) white = White(" \t\n") point = Literal(".") e = CaselessLiteral("E") integer = Word(nums).setParseAction(lambda s, l, t: [int(t[0])]) real = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))).setParseAction( lambda s, l, t: [float(t[0])]) key = Word(alphas, alphanums + '_') value_atom = integer ^ real ^ Word( alphanums) ^ quotedString.setParseAction(removeQuotes) value = Forward() # to be defined later with << operator keyvalue = Group(key + value) value << (value_atom | Group(lbrack + ZeroOrMore(keyvalue) + rbrack)) node = Group( Literal("node") + lbrack + Group(OneOrMore(keyvalue)) + rbrack) edge = Group( Literal("edge") + lbrack + Group(OneOrMore(keyvalue)) + rbrack) creator = Group(Literal("Creator") + Optional(restOfLine)) version = Group(Literal("Version") + Optional(restOfLine)) graphkey = Literal("graph").suppress() graph = Optional(creator)+Optional(version)+\ graphkey + lbrack + ZeroOrMore( (node|edge|keyvalue) ) + rbrack graph.ignore(comment) return graph
def __init__(self): # define grammar point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') number = Word(nums) integer = Combine(Optional(plusorminus) + number) floatnumber = Combine(integer + Optional(point + Optional(number)) + Optional(e + integer)) ident = Word('$', alphanums + '_') plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") expr = Forward() def defineFunction(name, parameterCount=None): keyword = CaselessKeyword(name).setParseAction(self.pushEnd) funcPattern = keyword + lpar if parameterCount == None: funcPattern += Optional(expr + ZeroOrMore(Literal(',') + expr)) elif parameterCount > 0: funcPattern += expr for i in range(parameterCount - 1): funcPattern += Literal(',') + expr funcPattern += rpar return funcPattern.setParseAction(self.pushFirst) maxFunc = defineFunction('max') minFunc = defineFunction('min') casesFunc = defineFunction('cases') cases1Func = defineFunction('cases1', parameterCount=5) cases2Func = defineFunction('cases2', parameterCount=8) cases3Func = defineFunction('cases3', parameterCount=11) cases333Func = defineFunction('cases333', parameterCount=11) round3downFunc = defineFunction('round3down', parameterCount=1) #func = (funcident.setParseAction(self.pushEnd)+lpar +Optional(expr+ZeroOrMore(Literal(',')+expr))+rpar).setParseAction(self.pushFirst) atom = (maxFunc | minFunc | casesFunc | cases1Func | cases2Func | cases3Func | cases333Func | round3downFunc | (e | floatnumber | integer | ident).setParseAction( self.pushFirst) | (lpar + expr.suppress() + rpar)) factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self.pushFirst)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.pushFirst)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.pushFirst)) self.pattern = expr + StringEnd() # map operator symbols to corresponding arithmetic operations self.opn = { "+": self.handleNone(lambda a, b: a + b), "-": self.handleNone(lambda a, b: a - b), "*": self.handleNone(lambda a, b: a * b, none_survives=True), "/": self.handleNone(lambda a, b: a / b, none_survives=True), "^": self.handleNone(lambda a, b: a**b, none_survives=True) } self.functions = { 'max': self.max, 'min': self.min, 'cases': self.cases, 'cases1': self.cases1, 'cases2': self.cases2, 'cases3': self.cases3, 'cases333': self.cases333, 'round3down': self.round3down }
def __init__(self): """ Please use any of the following symbols: expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ """ point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional(oneOf("- +")) + (pi | e | fnumber | ident + lpar + expr + rpar).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar)).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self.pushFirst)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.pushFirst)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.pushFirst)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # this will map operator symbols to their corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "^": operator.pow } self.fn = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0 }
cvt_int = lambda toks: int(toks[0]) integer.setParseAction(cvt_int) boolean_true = Keyword('True', caseless=True) boolean_true.setParseAction(lambda x: True) boolean_false = Keyword('False', caseless=True) boolean_false.setParseAction(lambda x: False) boolean = boolean_true | boolean_false none = Keyword('None', caseless=True) cvt_none = lambda toks: [None] none.setParseAction(cvt_none) e = CaselessLiteral("e") real = (Combine(Optional(oneOf('+ -')) + Word(nums) + '.' + Optional(Word(nums)) + Optional(e + Optional(oneOf('+ -')) + Word(nums))) | Combine(Optional(oneOf('+ -')) + Word(nums) + Optional('.') + Optional(Word(nums)) + e + Optional(oneOf('+ -')) + Word(nums))).setName('real') cvt_real = lambda toks: float(toks[0]) real.setParseAction(cvt_real) cmplx = real + CaselessLiteral('j') cvt_cmplx = lambda toks: complex(toks[0]) cmplx.setParseAction(cvt_cmplx) array_index = integer + Optional(colon + integer + Optional(colon + integer))