def getSymbols(_exportAll=False, _colrefLiteral=None, _addGeoReferences=False): """returns an STC-S grammar with terminal values. """ with utils.pyparsingWhitechars("\n\t\r "): _exactNumericRE = r"[+-]?\d+(\.(\d+)?)?|[+-]?\.\d+" exactNumericLiteral = Regex(_exactNumericRE) numberLiteral = Regex( r"(?i)(%s)(E[+-]?\d+)?" % _exactNumericRE).addParseAction(lambda s, p, toks: float(toks[0])) jdLiteral = ( Suppress(Literal("JD")) + exactNumericLiteral).addParseAction( lambda s, p, toks: times.jdnToDateTime(float(toks[0]))) mjdLiteral = ( Suppress(Literal("MJD")) + exactNumericLiteral).addParseAction( lambda s, p, toks: times.mjdToDateTime(float(toks[0]))) isoTimeLiteral = Regex( r"\d\d\d\d-?\d\d-?\d\d(T\d\d:?\d\d:?\d\d(\.\d*)?Z?)?" ).addParseAction(lambda s, p, toks: times.parseISODT(toks[0])) timeLiteral = (isoTimeLiteral | jdLiteral | mjdLiteral) if _colrefLiteral: numberLiteral = _colrefLiteral ^ numberLiteral timeLiteral = _colrefLiteral ^ timeLiteral res = _getSTCSGrammar(numberLiteral, timeLiteral, _exportAll, _addGeoReferences=_addGeoReferences) res.update(_makeSymDict(locals(), _exportAll)) return res
def getComplexGrammar(baseLiteral, pmBuilder, errorLiteral=None, nodeClass=NumericNode): """returns the root element of a grammar parsing numeric vizier-like expressions. This is used for both dates and floats, use baseLiteral to match the operand terminal. The trouble with dates is that the +/- operator has a simple float as the second operand, and that's why you can pass in an errorLiteral and and pmBuilder. """ if errorLiteral is None: errorLiteral = baseLiteral with utils.pyparsingWhitechars(" \t"): preOp = Literal("=") | Literal(">=") | Literal(">") | Literal( "<=") | Literal("<") rangeOp = Literal("..") pmOp = Literal("+/-") | Literal("\xb1".decode("iso-8859-1")) orOp = Literal("|") andOp = Literal("&") notOp = Literal("!") commaOp = Literal(",") preopExpr = Optional(preOp) + baseLiteral rangeExpr = baseLiteral + Suppress(rangeOp) + baseLiteral valList = baseLiteral + OneOrMore(Suppress(commaOp) + baseLiteral) pmExpr = baseLiteral + Suppress(pmOp) + errorLiteral simpleExpr = rangeExpr | pmExpr | valList | preopExpr expr = Forward() notExpr = Optional(notOp) + simpleExpr andExpr = notExpr + ZeroOrMore(Suppress(andOp) + notExpr) orExpr = andExpr + ZeroOrMore(Suppress(orOp) + expr) expr << orExpr exprInString = expr + StringEnd() rangeExpr.setName("rangeEx") rangeOp.setName("rangeOp") notExpr.setName("notEx") andExpr.setName("andEx") andOp.setName("&") orExpr.setName("orEx") expr.setName("expr") simpleExpr.setName("simpleEx") preopExpr.addParseAction(_simpleExprFactory(nodeClass)) rangeExpr.addParseAction(_getNodeFactory("..", nodeClass)) pmExpr.addParseAction(pmBuilder) valList.addParseAction(_getNodeFactory(",", nodeClass)) notExpr.addParseAction(_makeNotNodeFactory(nodeClass)) andExpr.addParseAction(_getBinopFactory("AND", nodeClass)) orExpr.addParseAction(_getBinopFactory("OR", nodeClass)) return exprInString
def _getSQLScriptGrammar(): """returns a pyparsing ParserElement that splits SQL scripts into individual commands. The rules are: Statements are separated by semicolons, empty statements are allowed. """ with utils.pyparsingWhitechars(" \t"): atom = Forward() atom.setName("Atom") sqlComment = Literal("--") + SkipTo("\n", include=True) cStyleComment = Literal("/*") + SkipTo("*/", include=True) comment = sqlComment | cStyleComment lineEnd = Literal("\n") simpleStr = QuotedString(quoteChar="'", escChar="\\", multiline=True, unquoteResults=False) quotedId = QuotedString(quoteChar='"', escChar="\\", unquoteResults=False) dollarQuoted = Regex(r"(?s)\$(\w*)\$.*?\$\1\$") dollarQuoted.setName("dollarQuoted") # well, quotedId is not exactly a string literal. I hate it, and so # it's lumped in here. strLiteral = simpleStr | dollarQuoted | quotedId strLiteral.setName("strLiteral") other = Regex("[^;'\"$]+") other.setName("other") literalDollar = Literal("$") + ~Literal("$") statementEnd = (Literal(';') + ZeroOrMore(lineEnd) | StringEnd()) atom << (Suppress(comment) | other | strLiteral | literalDollar) statement = OneOrMore(atom) + Suppress(statementEnd) statement.setName("statement") statement.setParseAction(lambda s, p, toks: " ".join(toks)) script = OneOrMore(statement) + StringEnd() script.setName("script") script.setParseAction( lambda s, p, toks: [t for t in toks.asList() if str(t).strip()]) if False: atom.setDebug(True) comment.setDebug(True) other.setDebug(True) strLiteral.setDebug(True) statement.setDebug(True) statementEnd.setDebug(True) dollarQuoted.setDebug(True) literalDollar.setDebug(True) return script
def getXMLGrammar(manipulator): with utils.pyparsingWhitechars("\r"): name = Word(alphas + "_:", alphanums + ".:_-") opener = Literal("<") closer = Literal(">") value = ( QuotedString(quoteChar="'", multiline=True, unquoteResults=False) | QuotedString(quoteChar='"', multiline=True, unquoteResults=False)) attribute = (name + Literal("=") + value) tagOpener = (opener + name + ZeroOrMore(White() + attribute) + Optional(White())) openingTag = (tagOpener + closer) closingTag = (opener + Literal("/") + name + Optional(White()) + closer) emptyTag = (tagOpener + Optional(White()) + Literal("/>")) processingInstruction = (opener + Literal("?") + SkipTo("?>", include="True")) comment = (opener + Literal("!--") + SkipTo("-->", include="True")) cdataSection = (opener + Literal("![CDATA[") + SkipTo("]]>", include="True")) nonTagStuff = CharsNotIn("<", min=1) docItem = Forward() element = ((openingTag + ZeroOrMore(docItem) + closingTag) | emptyTag) docItem << (element | processingInstruction | comment | cdataSection | nonTagStuff) document = (ZeroOrMore(Optional(White()) + docItem) + Optional(White()) + StringEnd()) document.parseWithTabs() element.addParseAction(manipulator._feedElement) tagOpener.addParseAction(manipulator._openElement) attribute.addParseAction(lambda s, p, t: [Attribute(t)]) openingTag.addParseAction(_nodify) closingTag.addParseAction(_nodify) emptyTag.addParseAction(_nodify) del manipulator for el in locals().itervalues(): # this *really* shouldn't be necessary el.leaveWhitespace() del el return locals()
def _getFieldsGrammar(): with utils.pyparsingWhitechars(" \n\t\r"): identifier = pyparsing.Regex(utils.identifierPattern.pattern[:-1] ).setName("identifier") formatCode = pyparsing.Regex("\d+s|[bBhHiIqQfd]" ).setName("fieldSpec") field = ( identifier("identifier") + pyparsing.Suppress(pyparsing.Literal("(")) + formatCode("formatCode") + pyparsing.Suppress(pyparsing.Literal(")"))).setParseAction( lambda s, p, t: dict(t)) return pyparsing.OneOrMore(field)+pyparsing.StringEnd()
def getUploadGrammar(): from gavo.imp.pyparsing import (Word, ZeroOrMore, Suppress, StringEnd, alphas, alphanums, CharsNotIn) # Should we allow more tableNames? with utils.pyparsingWhitechars(" \t"): tableName = Word(alphas + "_", alphanums + "_") # What should we allow/forbid in terms of URIs? uri = CharsNotIn(" ;,") uploadSpec = tableName("name") + "," + uri("uri") uploads = uploadSpec + ZeroOrMore(Suppress(";") + uploadSpec) + StringEnd() uploadSpec.addParseAction(lambda s, p, t: (t["name"], t["uri"])) return uploads
def getSimpleSTCSParser(): from gavo.imp.pyparsing import ( Regex, CaselessKeyword, OneOrMore, Forward, Suppress, Optional, ParseException, ParseSyntaxException) with utils.pyparsingWhitechars(" \t\n\r"): frameRE = _makeRE(TAP_SYSTEMS) refposRE = _makeRE(TAP_REFPOS) flavorRE = _makeRE(TAP_FLAVORS) systemRE = (r"(?i)\s*" r"(?P<frame>%s)?\s*" r"(?P<refpos>%s)?\s*" r"(?P<flavor>%s)?\s*")%( frameRE, refposRE, flavorRE) coordsRE = r"(?P<coords>(%s\s*)+)"%utils.floatRE simpleStatement = Regex("(?i)\s*" "(?P<shape>position|circle|box|polygon)" +systemRE +coordsRE) simpleStatement.setName("STC-S geometry") simpleStatement.addParseAction(lambda s,p,t: _makePgSphereInstance(t)) system = Regex(systemRE) system.setName("STC-S system spec") region = Forward() notExpr = CaselessKeyword("NOT") + Suppress('(') + region + Suppress(')') notExpr.addParseAction(lambda s,p,t: GeomExpr("UNKNOWN", "NOT", (t[1],))) opExpr = ( (CaselessKeyword("UNION") | CaselessKeyword("INTERSECTION"))("op") + Optional(Regex(frameRE))("frame") + Optional(Regex(refposRE)) + Optional(Regex(flavorRE)) + Suppress("(") + region + OneOrMore(region) + Suppress(")")) opExpr.addParseAction( lambda s,p,t: GeomExpr(str(t["frame"]), t[0].upper(), t[2:])) region << (simpleStatement | opExpr | notExpr) def parse(s): if s is None or not s.strip(): # special service: Null values return None if isinstance(s, pgsphere.PgSAdapter): return s try: res = utils.pyparseString(region, s, parseAll=True)[0] if not res.cooSys or res.cooSys.lower()=='unknownframe': # Sigh. res.cooSys = "UNKNOWN" return res except (ParseException, ParseSyntaxException), msg: raise common.STCSParseError("Invalid STCS (%s)"%str(msg))
def getColrefSymbols(): """returns an STC-S grammar with column references as values. The column references used here have the form "<colref>" to cut down on ambiguities. We only accept simple identifiers (i.e., not quoted in the SQL sense), though. """ def makeColRef(s, p, toks): return common.ColRef(toks[0][1:-1]) with utils.pyparsingWhitechars("\n\t\r "): atomicColRef = Regex('"[A-Za-z_][A-Za-z_0-9]*"').addParseAction( makeColRef) return getSymbols(_colrefLiteral=atomicColRef, _addGeoReferences=True)
def _getModelGrammar(): from gavo.imp.pyparsing import (Literal, Optional, StringEnd, Suppress, Word, ZeroOrMore, alphas) with utils.pyparsingWhitechars(" \t"): metaKey = Word(alphas + ".") modChar = Literal('!') | '1' modifier = Suppress('(') + Optional(modChar) + Suppress(')') assertion = metaKey("key") + Optional(modifier)("mod") model = assertion + ZeroOrMore(Suppress(',') + assertion) + StringEnd() def _buildAssertion(s, p, toks): key = str(toks["key"]) mod = tuple(toks.get("mod", ())) return _assertionCodes[mod](key) assertion.addParseAction(_buildAssertion) model.addParseAction(lambda s, p, toks: MetaValidator(toks)) return model
def _getColDefGrammar(self): with utils.pyparsingWhitechars("\n\t\r "): intLiteral = pyparsing.Word(pyparsing.nums) # need to manually swallow whitespace after literals blindWhite = pyparsing.Suppress( pyparsing.Optional(pyparsing.White())) dash = blindWhite + pyparsing.Literal("-") + blindWhite range = pyparsing.Combine( dash + blindWhite + intLiteral | intLiteral + pyparsing.Optional(dash + pyparsing.Optional(intLiteral))) range.setName("Column range") identifier = pyparsing.Regex(utils.identifierPattern.pattern[:-1]) identifier.setName("Column key") clause = (identifier + pyparsing.Literal(":") + blindWhite + range).addParseAction(lambda s, p, t: (t[0], t[2])) colDefs = pyparsing.ZeroOrMore(clause) + pyparsing.StringEnd() # range.setDebug(True);identifier.setDebug(True);clause.setDebug(True) return colDefs
def _getMacroGrammar(self, debug=False): with utils.pyparsingWhitechars(" \t"): macro = Forward() quoteEscape = (Literal("\\{").addParseAction(lambda *args: "{") | Literal("\\}").addParseAction(lambda *args: "}")) charRun = Regex(r"[^}\\]+") argElement = macro | quoteEscape | charRun argument = Suppress("{") + ZeroOrMore(argElement) + Suppress("}") argument.addParseAction(lambda s, pos, toks: "".join(toks)) arguments = ZeroOrMore(argument) arguments.setWhitespaceChars("") macroName = Regex("[A-Za-z_][A-Za-z_0-9]+") macroName.setWhitespaceChars("") macro << Suppress("\\") + macroName + arguments macro.addParseAction(self._execMacro) literalBackslash = Literal("\\\\") literalBackslash.addParseAction(lambda *args: "\\") suppressedLF = Literal("\\\n") suppressedLF.addParseAction(lambda *args: " ") glue = Literal("\\+") glue.addParseAction(lambda *args: "") return literalBackslash | suppressedLF | glue | macro
def impl(cls): from gavo.imp.pyparsing import (Word, Literal, alphas, alphanums, QuotedString, Forward, ZeroOrMore, Group, Optional) with utils.pyparsingWhitechars("\t\n\r "): qualifiedIdentifier = Word(alphas + "_:", alphanums + "-._:") plainIdentifier = Word(alphas + "_", alphanums + "-._") externalIdentifier = Word(alphas + "_", alphanums + "._/#-") plainLiteral = Word(alphanums + "_-.") quotedLiteral = QuotedString(quoteChar='"', escQuote='""') reference = Literal('@') + externalIdentifier complexImmediate = Forward() simpleImmediate = plainLiteral | quotedLiteral value = reference | complexImmediate | simpleImmediate attributeDef = (plainIdentifier + Literal(":") + value) typeAnnotation = (Literal('(') + qualifiedIdentifier + Literal(')')) objectBody = (Literal('{') + Group(ZeroOrMore(attributeDef)) + Literal('}')) obj = typeAnnotation + objectBody sequenceBody = (Literal('[') + Group(ZeroOrMore(value | objectBody)) + Literal(']')) collection = Optional(typeAnnotation) + sequenceBody complexImmediate << (obj | collection) for n, func in globals().iteritems(): if n.startswith("_pa_"): locals()[n[4:]].setParseAction(func) cls.symbols = locals() return obj
def _getShPatGrammar(): """returns a grammar to translate posix shell patterns to posix regular expressions. This is different from fnmatch.translate in that it handles escaping correctly. """ from gavo.imp.pyparsing import (Literal, Regex, CharsNotIn, ZeroOrMore, QuotedString) with utils.pyparsingWhitechars(""): enumChars = QuotedString( quoteChar="[", endQuoteChar="]", escChar="\\").addParseAction(_mungeEnumSequence) noEnum = Literal("[").addParseAction(lambda s, p, t: "\\[") star = Literal("*").addParseAction(lambda s, p, t: ".*") questionmark = Literal("?").addParseAction(lambda s, p, t: ".") escSeq = Regex(r"\\(.)").addParseAction( lambda s, p, t: re.escape(t[0][1])) normalStuff = CharsNotIn(r"*?[\\").addParseAction( lambda s, p, t: re.escape("".join(t))) shPat = ZeroOrMore(escSeq | enumChars | noEnum | star | questionmark | normalStuff) return shPat
def getStringGrammar(): """returns a grammar for parsing vizier-like string expressions. """ # XXX TODO: should we cut at =| (which is currently parsed as = |)? with utils.pyparsingWhitechars(" \t"): simpleOperator = Literal("==") | Literal("!=") | Literal(">=") |\ Literal(">") | Literal("<=") | Literal("<") | Literal("=~") |\ Literal("=,") simpleOperand = Regex(r"[^\s].*|") # XXX probably a bug in pyparsing: White shouldn't be necessary here White = Word(" \t") simpleExpr = simpleOperator + Optional(White) + simpleOperand commaOperand = Regex("[^,]+") barOperand = Regex("[^|]+") commaEnum = Literal("=,") + commaOperand + ZeroOrMore( Suppress(",") + commaOperand) exclusionEnum = Literal("!=,") + commaOperand + ZeroOrMore( Suppress(",") + commaOperand) barEnum = Literal("=|") + barOperand + ZeroOrMore( Suppress("|") + barOperand) enumExpr = exclusionEnum | commaEnum | barEnum patLiterals = CharsNotIn("[*?") wildStar = Literal("*") wildQmark = Literal("?") setElems = CharsNotIn("]") setSpec = Suppress("[") + setElems + Suppress("]") pattern = OneOrMore(setSpec | wildStar | wildQmark | patLiterals) patternOperator = Literal("~") | Literal("=") | Literal("!~") |\ Literal("!") patternExpr = patternOperator + Optional(White) + pattern nakedExpr = Regex("[^=!~|><]") + Optional(simpleOperand) stringExpr = enumExpr | simpleExpr | patternExpr | nakedExpr doc = stringExpr + StringEnd() stringExpr.setName("StringExpr") enumExpr.setName("EnumExpr") simpleOperand.setName("Operand") simpleOperator.setName("Operator") nakedExpr.setName("SingleOperand") debug = False stringExpr.setDebug(debug) enumExpr.setDebug(debug) patLiterals.setDebug(debug) simpleOperand.setDebug(debug) simpleOperator.setDebug(debug) nakedExpr.setDebug(debug) simpleExpr.addParseAction(_makeOpNode) patternExpr.addParseAction(_makeOpNode) enumExpr.addParseAction(_makeOpNode) makeDefaultExpr = _getNodeFactory("==", StringNode) nakedExpr.addParseAction( lambda s, p, toks: makeDefaultExpr(s, p, ["".join(toks)])) wildStar.addParseAction(_makeOpNode) wildQmark.addParseAction(_makeOpNode) setElems.addParseAction(_getNodeFactory("[", StringNode)) return doc
def _getSTCSGrammar(numberLiteral, timeLiteral, _exportAll=False, _addGeoReferences=False): """returns a dictionary of symbols for a grammar parsing STC-S into a concrete syntax tree. numberLiteral and timeLiteral are pyparsing symbols for numbers and datetimes, respectively. _addGeoReferences lets you write quoted references to vectors (like Circle "center" 20.). """ with utils.pyparsingWhitechars("\n\t\r "): number = numberLiteral del numberLiteral # units _unitOpener = Suppress(CaselessKeyword("unit")) _spaceUnitWord = Regex(_reFromKeys(spatialUnits)) _timeUnitWord = Regex(_reFromKeys(temporalUnits)) spaceUnit = _unitOpener - OneOrMore(_spaceUnitWord).addParseAction( _stringifyBlank)("unit") timeUnit = _unitOpener - _timeUnitWord("unit") spectralUnit = _unitOpener - Regex(_reFromKeys(spectralUnits))("unit") redshiftUnit = _unitOpener - ( (_spaceUnitWord + "/" + _timeUnitWord).addParseAction(_stringify) | CaselessKeyword("nil"))("unit") velocityUnit = _unitOpener - (OneOrMore( (_spaceUnitWord + "/" + _timeUnitWord).addParseAction( _stringify)).addParseAction(_stringifyBlank))("unit") # basic productions common to most STC-S subphrases astroYear = Regex("[BJ][0-9]+([.][0-9]*)?") fillfactor = (Suppress(CaselessKeyword("fillfactor")) + number("fillfactor")) noEqFrame = (CaselessKeyword("J2000") | CaselessKeyword("B1950") | CaselessKeyword("ICRS") | CaselessKeyword("GALACTIC") | CaselessKeyword("GALACTIC_I") | CaselessKeyword("GALACTIC_II") | CaselessKeyword("SUPER_GALACTIC") | CaselessKeyword("GEO_C") | CaselessKeyword("GEO_D") | CaselessKeyword("HPR") | CaselessKeyword("HGS") | CaselessKeyword("HGC") | CaselessKeyword("HPC") | CaselessKeyword("UNKNOWNFrame"))("frame") eqFrameName = (CaselessKeyword("FK5") | CaselessKeyword("FK4") | CaselessKeyword("ECLIPTIC"))("frame") eqFrame = eqFrameName + Optional(astroYear("equinox")) frame = eqFrame | noEqFrame plEphemeris = CaselessKeyword("JPL-DE200") | CaselessKeyword( "JPL-DE405") refpos = ((Regex(_reFromKeys(common.stcRefPositions)))("refpos") + Optional(plEphemeris("plEphemeris"))) flavor = (Regex(_reFromKeys(stcsFlavors)))("flavor") # properties of coordinates error = Suppress(CaselessKeyword("Error")) + OneOrMore(number) resolution = Suppress( CaselessKeyword("Resolution")) + OneOrMore(number) size = Suppress(CaselessKeyword("Size")) + OneOrMore(number) pixSize = Suppress(CaselessKeyword("PixSize")) + OneOrMore(number) cooProps = (Optional(error("error")) + Optional(resolution("resolution")) + Optional(size("size")) + Optional(pixSize("pixSize"))) # properties of most spatial specs _coos = ZeroOrMore(number)("coos") _pos = Optional(ZeroOrMore(number)("pos")) if _addGeoReferences: # include references to vectors, for getColrefSymbols complexColRef = Regex( '[[][A-Za-z_][A-Za-z_0-9]*[]]').addParseAction( lambda s, p, toks: common.GeometryColRef(toks[0][1:-1])) _coos = complexColRef("coos") | _coos _pos = complexColRef("pos") | _pos positionSpec = Suppress(CaselessKeyword("Position")) + _pos epochSpec = Suppress(CaselessKeyword("Epoch")) - astroYear _spatialProps = Optional(spaceUnit) + cooProps velocitySpec = (CaselessKeyword("Velocity")("type") + OneOrMore(number)("pos")) velocityInterval = (Optional( CaselessKeyword("VelocityInterval")("type") + Optional(fillfactor) + _coos) + Optional(velocitySpec) + Optional(velocityUnit) + cooProps).addParseAction(makeTree) _spatialTail = (_spatialProps + Optional(velocityInterval)("velocity")) _regionTail = Optional(positionSpec) + _spatialTail _commonSpaceItems = ( frame + Optional(refpos) + Optional(flavor) + Optional(epochSpec("epoch").addParseAction(_stringify))) _commonRegionItems = Optional(fillfactor) + _commonSpaceItems # times and time intervals timescale = (Regex("|".join(common.stcTimeScales)))("timescale") timephrase = Suppress(CaselessKeyword("Time")) + timeLiteral _commonTimeItems = Optional(timeUnit) + cooProps _intervalOpener = (Optional(fillfactor) + Optional(timescale("timescale")) + Optional(refpos)) _intervalCloser = Optional(timephrase("pos")) + _commonTimeItems timeInterval = (CaselessKeyword("TimeInterval")("type") + _intervalOpener + ZeroOrMore(timeLiteral)("coos") + _intervalCloser) startTime = (CaselessKeyword("StartTime")("type") + _intervalOpener + timeLiteral.setResultsName("coos", True) + _intervalCloser) stopTime = (CaselessKeyword("StopTime")("type") + _intervalOpener + timeLiteral.setResultsName("coos", True) + _intervalCloser) time = (CaselessKeyword("Time")("type") + Optional(timescale("timescale")) + Optional(refpos) + Optional(timeLiteral.setResultsName("pos", True)) + _commonTimeItems) timeSubPhrase = (timeInterval | startTime | stopTime | time).addParseAction(makeTree) # atomic "geometries"; I do not bother to specify their actual # arguments since, without knowing the frame, they may be basically # anthing. Also, I want to allow geometry column references. _atomicGeometryKey = (CaselessKeyword("AllSky").setName("sub-geometry") | CaselessKeyword("Circle") | CaselessKeyword("Ellipse") | CaselessKeyword("Box") | CaselessKeyword("Polygon") | CaselessKeyword("Convex") | CaselessKeyword("PositionInterval")) atomicGeometry = (_atomicGeometryKey("type") + _commonRegionItems + _coos + _regionTail) # compound "geometries" _compoundGeoExpression = Forward() _compoundGeoOperand = ( (_atomicGeometryKey("subtype") + _coos) | _compoundGeoExpression).addParseAction(lambda s, p, t: dict(t)) _compoundGeoOperatorUnary = CaselessKeyword("Not") _compoundGeoOperandsUnary = (Suppress('(') + _compoundGeoOperand + Suppress(')')) _compoundGeoExprUnary = (_compoundGeoOperatorUnary("subtype") + _compoundGeoOperandsUnary("children")) _compoundGeoOperatorBinary = CaselessKeyword("Difference") _compoundGeoOperandsBinary = (Suppress('(') + _compoundGeoOperand + _compoundGeoOperand + Suppress(')')) _compoundGeoExprBinary = (_compoundGeoOperatorBinary("subtype") + _compoundGeoOperandsBinary("children")) _compoundGeoOperatorNary = (CaselessKeyword("Union") | CaselessKeyword("Intersection")) _compoundGeoOperandsNary = (Suppress('(') + _compoundGeoOperand + _compoundGeoOperand + ZeroOrMore(_compoundGeoOperand) + Suppress(')')) _compoundGeoExprNary = (_compoundGeoOperatorNary("subtype") + _compoundGeoOperandsNary("children")) _compoundGeoExpression << (_compoundGeoExprUnary | _compoundGeoExprBinary | _compoundGeoExprNary) compoundGeoPhrase = ( _compoundGeoOperatorUnary("type") + _commonRegionItems + _compoundGeoOperandsUnary("children") + _regionTail | _compoundGeoOperatorBinary("type") + _commonRegionItems + _compoundGeoOperandsBinary("children") + _regionTail | _compoundGeoOperatorNary("type") + _commonRegionItems - _compoundGeoOperandsNary("children") + _regionTail) # space subphrase positionInterval = (CaselessKeyword("PositionInterval")("type") + _commonRegionItems + _coos + _regionTail) position = (CaselessKeyword("Position")("type") + _commonSpaceItems + _pos + _spatialTail) spaceSubPhrase = (positionInterval | position | atomicGeometry | compoundGeoPhrase).addParseAction(makeTree) # spectral subphrase spectralSpec = (Suppress(CaselessKeyword("Spectral")) + number)("pos") _spectralTail = Optional(spectralUnit) + cooProps spectralInterval = (CaselessKeyword("SpectralInterval")("type") + Optional(fillfactor) + Optional(refpos) + _coos + Optional(spectralSpec) + _spectralTail) spectral = (CaselessKeyword("Spectral")("type") + Optional(refpos) + _pos + _spectralTail) spectralSubPhrase = (spectralInterval | spectral).addParseAction(makeTree) # redshift subphrase redshiftType = Regex("VELOCITY|REDSHIFT")("redshiftType") redshiftSpec = (Suppress(CaselessKeyword("Redshift")) + number)("pos") dopplerdef = Regex("OPTICAL|RADIO|RELATIVISTIC")("dopplerdef") _redshiftTail = Optional(redshiftUnit) + cooProps redshiftInterval = (CaselessKeyword("RedshiftInterval")("type") + Optional(fillfactor) + Optional(refpos) + Optional(redshiftType) + Optional(dopplerdef) + _coos + Optional(redshiftSpec) + _redshiftTail) redshift = (CaselessKeyword("Redshift")("type") + Optional(refpos) + Optional(redshiftType) + Optional(dopplerdef) + _pos + _redshiftTail) redshiftSubPhrase = (redshiftInterval | redshift).addParseAction(makeTree) # system subphrase (extension, see docs) # ids match Name from XML spec; we're not doing char refs and similar here xmlName = Word(alphas + "_:", alphanums + '.-_:').addParseAction(_stringify) systemDefinition = (Suppress(CaselessKeyword("System")) + xmlName("libSystem")) # top level stcsPhrase = ( #noflake: stcsPhrase is returned through locals() Optional(timeSubPhrase)("time") + Optional(spaceSubPhrase) ("space") + Optional(spectralSubPhrase)("spectral") + Optional(redshiftSubPhrase)("redshift") + Optional(systemDefinition)) + StringEnd() return _makeSymDict(locals(), _exportAll)
def impl(cls): from gavo.imp.pyparsing import (Word, Literal, Regex, Optional, ZeroOrMore, alphas, Suppress, Forward, White) with utils.pyparsingWhitechars(''): unit_atom = Word(alphas).addParseAction(UnitNode.fromToks) unit_atom.setName("atomic unit") quoted_unit_atom = ("'" + Word(alphas) + "'").addParseAction( QuotedUnitNode.fromToks) quoted_unit_atom.setName("quoted atomic unit") OPEN_P = Literal('(') CLOSE_P = Literal(')') SIGN = Literal('+') | Literal('-') FUNCTION_NAME = Word(alphas) UNSIGNED_INTEGER = Word("01234567890") SIGNED_INTEGER = SIGN + UNSIGNED_INTEGER FLOAT = Regex(r"[+-]?([0-9]+(\.[0-9]*)?)") VOFLOAT = Regex(r"0.[0-9]+([eE][+-]?[0-9]+)?" "|[1-9][0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?") integer = SIGNED_INTEGER | UNSIGNED_INTEGER power_operator = Literal('**') multiplication_operator = Literal(".") division_operator = Literal("/") numeric_power = ( integer | OPEN_P + integer + CLOSE_P | OPEN_P + FLOAT + CLOSE_P | OPEN_P + integer + '/' + UNSIGNED_INTEGER.addParseAction(lambda s, p, t: t[0] + ".") + CLOSE_P) numeric_power.setParseAction(evalAll) pow_10 = Literal("10") + power_operator + numeric_power scale_factor = (pow_10 | VOFLOAT).setParseAction(evalAll) any_unit_atom = unit_atom | quoted_unit_atom factor = (any_unit_atom + Optional(Suppress(power_operator) + numeric_power)).addParseAction(Factor.fromToks) complete_expression = Forward() function_application = (FUNCTION_NAME + Suppress(OPEN_P) + complete_expression + Suppress(CLOSE_P)) function_application.addParseAction(FunctionApplication.fromToks) unit_expression = ( Suppress(OPEN_P) + complete_expression + Suppress(CLOSE_P) | (factor ^ function_application)) product_of_units = ( unit_expression + ZeroOrMore(multiplication_operator + unit_expression)).setParseAction(_buildTerm) complete_expression << ( product_of_units + Optional(division_operator + unit_expression)) complete_expression.setParseAction(Term.fromToks) input = (Optional(scale_factor) + Optional(Suppress(White())) + complete_expression).setParseAction(Expression.fromToks) cls.symbols = locals() return input