def dispatchReader(rdr, hash): """Read and return the next object defined by the next dispatch character. rdr -- read/unread-able object hash -- ignored Read a character from rdr. Call its associated function in dispatchMacros. Return that value. May raise ReaderException.""" ch = read1(rdr) if ch == "": raise ReaderException("EOF while reading character", rdr) if ch not in dispatchMacros: raise ReaderException("No dispatch macro for: ("+ ch + ")", rdr) return dispatchMacros[ch](rdr, ch)
def derefNotImplemented(rdr, _): """Unconditionally raise ReaderException. The deref syntax @foo is not currently implemented. @foo will pass through silently as a symbol unless it's caught here, as it should be.""" raise ReaderException("Deref syntax @foo not currently implemented.", rdr)
def readDelimitedList(delim, rdr, isRecursive): """Read and collect objects until an unmatched delim is reached. delim -- the terminating delimiter rdr -- read/unread-able object isRecursive -- ignored May raise ReaderException. Return a Python list of those objects.""" firstline = rdr.lineCol()[0] a = [] while True: ch = read1(rdr) while ch in whiteSpace: ch = read1(rdr) if ch == "": raise ReaderException( "EOF while reading starting at line {0}".format(firstline)) if ch == delim: break macrofn = getMacro(ch) if macrofn is not None: mret = macrofn(rdr, ch) if mret is not None and mret is not rdr: a.append(mret) else: rdr.back() o = read(rdr, True, None, isRecursive) a.append(o) return a
def evalReaderNotImplemented(rdr, _): """Unconditionally raise ReaderException. The eval syntax #= not currently implemented and should be caught by the #reader. This message is more informative than the `no dispatch macro' message.""" raise ReaderException("Eval syntax #= not currently implemented.", rdr)
def unmatchedClosingDelimiterReader(rdr, un): """Raise ReaderException. rdr -- read/unread-able object (used for exception message) un -- the stray delimiter This will be called if un has no matching opening delimiter in rdr.""" raise ReaderException( "Unmatched Delimiter {0} at {1}".format(un, rdr.lineCol()))
def stringReader(rdr, doublequote): """Read a double-quoted "foo" literal string from rdr. rdr -- a read/unread-able object doublequote -- ignored May raise ReaderException. Return a str or unicode object.""" buf = [] ch = read1(rdr) while True: if ch == "": raise ReaderException("EOF while reading string") if ch == '\\': ch = read1(rdr) if ch == "": raise ReaderException("EOF while reading string") elif ch in chrLiterals: ch = chrLiterals[ch] elif ch == "u": ch = read1(rdr) if not ch in hexChars: raise ReaderException( "Hexidecimal digit expected after" " \\u in literal string, got:" " ({0})".format(ch), rdr) ch = readUnicodeChar(rdr, ch, 16, 4, True) elif ch in octalChars: ch = readUnicodeChar(rdr, ch, 8, 3, False) if ord(ch) > 255: raise ReaderException( "Octal escape sequence in literal" " string must be in range [0, 377]" ", got: ({0})".format(ord(ch)), rdr) else: raise ReaderException( "Unsupported escape character in" " literal string: \\{0}".format(ch), rdr) elif ch == '"': return "".join(buf) buf += ch ch = read1(rdr)
def readUnicodeChar(rdr, initch, base, length, exact): """Read a string that specifies a Unicode codepoint. rdr -- read/unread-able object initch -- the first character of the codepoint string base -- expected radix of the codepoint length -- maximum number of characters in the codepoint exact -- if True, codepoint string must contain length characters if False, it must contain [1, length], inclusive May raise ReaderException. Return a unicode string of length one.""" digits = [] try: int(initch, base) digits.append(initch) except ValueError: raise ReaderException( "Expected base {0} digit, got:" " ({1})".format(base, initch or "EOF"), rdr) for i in range(2, length + 1): ch = read1(rdr) if ch == "" or ch in whiteSpace or isMacro(ch): rdr.back() i -= 1 break try: int(ch, base) digits.append(ch) except ValueError: if exact: raise ReaderException( "Expected base {0} digit, got:" " ({1})".format(base, ch or "EOF"), rdr) else: rdr.back() break if i != length and exact: raise ReaderException( "Invalid character length: ({0}), should be:" " ({1})".format(i, length), rdr) return unichr(int("".join(digits), base))
def interpretToken(s): """Return the value defined by the string s. This function exists as a pre-filter to matchSymbol(). If is is found in lispreader.INTERPRET_TOKENS, return that, else see if s is a valid Symbol and return that. Raise ReaderException if s is not a valid token.""" if s in INTERPRET_TOKENS: return INTERPRET_TOKENS[s] ret = matchSymbol(s) if ret is None: raise ReaderException("Unknown symbol {0}".format(s)) return ret
def unquoteReader(rdr, tilde): """Return one of: * (unquote-splicing next-object-read) * (unquote next-object-read)""" s = read1(rdr) if s == "": raise ReaderException("EOF reading unquote", rdr) if s == "@": o = read(rdr, True, None, True) return RT.list(_UNQUOTE_SPLICING_, o) else: rdr.back() o = read(rdr, True, None, True) return RT.list(_UNQUOTE_, o)
def readNumber(rdr, initch): """Return the next number read from rdr. rdr -- a read/unread-able object initch -- the first character of the number May raise ReaderException.""" sb = [initch] while True: ch = read1(rdr) if ch == "" or ch in whiteSpace or isMacro(ch): rdr.back() break sb.append(ch) s = "".join(sb) try: n = matchNumber(s) except Exception as e: raise ReaderException(e.args[0], rdr) if n is None: raise ReaderException("Invalid number: " + s, rdr) return n
def metaReader(rdr, caret): """Read two objects from rdr. Return second with first as meta data. rdr -- read/unread-able object caret -- ignored May raise ReaderException.""" line = rdr.lineCol()[0] meta = read(rdr, True, None, True) if isinstance(meta, (str, Symbol)): meta = RT.map(TAG_KEY, meta) elif isinstance(meta, Keyword): meta = RT.map(meta, T) elif not isinstance(meta, IPersistentMap): raise ReaderException("Metadata must be Symbol,Keyword,String or Map", rdr) o = read(rdr, True, None, True) if not hasattr(o, "withMeta"): # can't attach rdr to the exception here as it would point # to the *end* of the object just read' raise ReaderException("Cannot attach meta to a object without" " .withMeta") return o.withMeta(meta)
def read(rdr, eofIsError, eofValue, isRecursive): """Read and return one object from rdr. rdr -- a read/unread-able object eofIsError -- if True, raise an exception when rdr is out of characters if False, return eofValue instead eofValue -- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ isRecursive -- not currently used The basic sequence is as follows: 1. consume white space 2. check for eof 3. check for a number (sans [+-]) <- Does this 4. dispatch on macro characters <- order matter? 5. check for a number (with [+-]) 6. check for a symbol""" while True: ch = read1(rdr) while ch in whiteSpace: ch = read1(rdr) if ch == "": if eofIsError: raise ReaderException("EOF while reading", rdr) else: return eofValue if ch.isdigit(): return readNumber(rdr, ch) m = getMacro(ch) if m is not None: ret = m(rdr, ch) if ret is rdr: continue return ret if ch in ["+", "-"]: ch2 = read1(rdr) if ch2.isdigit(): rdr.back() n = readNumber(rdr, ch) return n rdr.back() token = readToken(rdr, ch) return interpretToken(token)
def argReader(rdr, perc): """Read and intern an anonymous function argument (%, %1, %&, etc.). rdr -- read/unread-able object prec -- ignored May raise IllegalStateException, or ReaderException. """ if ARG_ENV.deref() is None: return interpretToken(readToken(rdr, '%')) ch = read1(rdr) rdr.back() if ch == "" or ch in whiteSpace or isTerminatingMacro(ch): return registerArg(1) n = read(rdr, True, None, True) if isinstance(n, Symbol) and n == _AMP_: return registerArg(-1) if not isinstance(n, int): raise ReaderException("arg literal must be %, %& or %integer", rdr) return registerArg(n)
def characterReader(rdr, backslash): """Read a single clojure-py formatted character from rdr. rdr -- read/unread-able object backslash -- ignored May raise ReaderException. Return a unicode string of lenght one.""" ch = rdr.read() if ch == "": raise ReaderException("EOF while reading character", rdr) token = readToken(rdr, ch) # .decode("utf-8") if len(token) == 1: return token elif token in namedChars: return namedChars[token] elif token.startswith("u"): try: ch = stringCodepointToUnicodeChar(token, 1, 4, 16) except UnicodeError as e: raise ReaderException(e.args[0], rdr) codepoint = ord(ch) if u"\ud800" <= ch <= u"\udfff": raise ReaderException( "Invalid character constant in literal" " string: \\{0}".format(token), rdr) return ch elif token.startswith("o"): if len(token) > 4: raise ReaderException( "Invalid octal escape sequence length in" " literal string. Three digits max:" " \\{0}".format(token), rdr) try: ch = stringCodepointToUnicodeChar(token, 1, len(token) - 1, 8) except UnicodeError as e: raise ReaderException(e.args[0], rdr) codepoint = ord(ch) if codepoint > 255: raise ReaderException( "Octal escape sequence in literal string" " must be in range [0, 377], got:" " (\\o{0})".format(codepoint), rdr) return ch raise ReaderException("Unsupported character: \\" + token, rdr)
def readNamedUnicodeChar(rdr): """Read \N{foo} syntax, starting at the {. rdr -- a read/unread-able object May raise ReaderException. Return the unicode character named by foo.""" buf = [] ch = read1(rdr) if ch != "{": raise ReaderException( "Expected { in named unicode escape sequence," " got: ({0})".format(ch or "EOF"), rdr) while True: ch = read1(rdr) if ch == "": raise ReaderException( "EOF while reading named unicode escape" " sequence", rdr) elif ch in unicodeNameChars: buf.append(ch) continue elif ch == '"': raise ReaderException( "Missing } while reading named unicode" " escape sequence", rdr) elif ch == '}': break else: raise ReaderException( "Illegal character in named unicode" " escape sequence: ({0})".format(ch), rdr) name = "".join(buf).strip() if len(name) == 0: raise ReaderException( "Expected name between {} in named unicode " "escape sequence", rdr) try: return unicodedata.lookup(name) except KeyError: raise ReaderException( "Unknown unicode character name in escape" " sequence: ({0})".format(name), rdr)
def syntaxQuote(self, form): # compiler uses this module, so import it lazily from clojure.lang.compiler import builtins as compilerbuiltins if form in compilerbuiltins: ret = RT.list(_QUOTE_, form) elif isinstance(form, Symbol): sym = form if sym.ns is None and sym.name.endswith("#"): gmap = GENSYM_ENV.deref() if gmap == None: raise ReaderException("Gensym literal not in syntax-quote, before", self.rdr) gs = gmap[sym] if gs is None: gs = Symbol(None, "{0}__{1}__auto__".format(sym.name[:-1], RT.nextID())) GENSYM_ENV.set(gmap.assoc(sym, gs)) sym = gs elif sym.ns is None and sym.name.endswith("."): ret = sym elif sym.ns is None and sym.name.startswith("."): ret = sym elif sym.ns is not None: ret = sym else: comp = currentCompiler.deref() if comp is None: raise IllegalStateException("No Compiler found in syntax quote!") ns = comp.getNS() if ns is None: raise IllegalStateException("No ns in reader") item = namespace.findItem(ns, sym) if item is None: sym = Symbol(ns.__name__, sym.name) else: sym = Symbol(item.ns.__name__, sym.name) ret = RT.list(_QUOTE_, sym) else: if isUnquote(form): return form.next().first() elif isUnquoteSplicing(form): raise IllegalStateException("splice not in list") elif isinstance(form, IPersistentCollection): if isinstance(form, IPersistentMap): keyvals = self.flattenMap(form) ret = RT.list(_APPLY_, _HASHMAP_, RT.list(RT.cons(_CONCAT_, self.sqExpandList(keyvals.seq())))) elif isinstance(form, (IPersistentVector, IPersistentSet)): ret = RT.list(_APPLY_, _VECTOR_, RT.list(_SEQ_, RT.cons(_CONCAT_, self.sqExpandList(form.seq())))) elif isinstance(form, (ISeq, IPersistentList)): seq = form.seq() if seq is None: ret = RT.cons(_LIST_, None) else: ret = RT.list(_SEQ_, RT.cons(_CONCAT_, self.sqExpandList(seq))) else: raise IllegalStateException("Unknown collection type") elif isinstance(form, (int, float, str, Keyword)): ret = form else: ret = RT.list(_QUOTE_, form) if getattr(form, "meta", lambda: None)() is not None: newMeta = form.meta().without(LINE_KEY) if len(newMeta) > 0: return RT.list(_WITH_META_, ret, self.syntaxQuote(form.meta()))#FIXME: _WITH_META_ undefined return ret
def regexReader(rdr, doublequote): """Read a possibly multi-line Python re pattern string. rdr -- read/unread-able object doubleQuote -- ignored raw -- if True, the string is to be treated as a Python r"string". May raise ReaderException. Return a Unicode string""" pat = [] ch = read1(rdr) while ch != '"': if ch == "": raise ReaderException("EOF while reading regex pattern", rdr) if ch == "\\": ch = read1(rdr) if ch == "": raise ReaderException("EOF while reading regex pattern", rdr) # \, ', ", a, b, f, n, r, t, v elif ch in regexCharLiterals: ch = regexCharLiterals[ch] # \uXXXX elif ch == "u": ch = read1(rdr) if not ch in hexChars: raise ReaderException("Hexidecimal digit expected after" " \\u in regex pattern, got:" " ({0})".format(ch or "EOF"), rdr) ch = readUnicodeChar(rdr, ch, 16, 4, True) # \uXXXXXXXX elif ch == "U": ch = read1(rdr) if not ch in hexChars: raise ReaderException("Hexidecimal digit expected after" " \\U in regex pattern, got:" " ({0})".format(ch or "EOF"), rdr) ch = readUnicodeChar(rdr, ch, 16, 8, True) # \xXX elif ch == "x": ch = read1(rdr) if not ch in hexChars: raise ReaderException("Hexidecimal digit expected after" " \\x in regex pattern, got:" " ({0})".format(ch or "EOF"), rdr) ch = readUnicodeChar(rdr, ch, 16, 2, True) #\O, \OO, or \OOO elif ch.isdigit(): ch = readUnicodeChar(rdr, ch, 8, 3, False) # <= False #\N{named unicode character} elif ch == "N": ch = readNamedUnicodeChar(rdr) # Didnt recognize any escape sequence but ch got # reset to the char after \\ so... else: pat.append("\\") pat.append(ch) ch = read1(rdr) try: return re.compile(u"".join(pat)) except re.error as e: raise ReaderException("invalid regex pattern: {0}".format(e.args[0]), rdr)
def rawRegexReader(rdr, r): r"""Read a regex pattern string ignoring most escape sequences. rdr -- a read/unread-able object r -- ignored The following two are the only valid escape sequences. But only if they are not preceded by an even number of backslashes. When \ are in pairs they've lost their abilty to escape the next character. Both backslashes *still* get put into the string. * \uXXXX \u03bb => λ \\u03bb => \ \ u 0 3 b b \\\u03bb => \ \ λ * \UXXXXXXXX same as above Everything else will result in two characters in the string: \n => \ n \r => \ r \t => \ t \" => \ " \xff => \ x f f \377 => \ 3 7 7 \N{foo} \ N { f o o } May raise ReaderException. Return a Unicode string. """ nSlashes = 0 pat = [] ch = read1(rdr) if ch == "": raise ReaderException("EOF expecting regex pattern", rdr) if ch != '"': raise ReaderException("Expected regex pattern after #r", rdr) ch = read1(rdr) while ch != '"': if ch == "": raise ReaderException("EOF while reading regex pattern", rdr) if ch == "\\": nSlashes += 1 ch = read1(rdr) if ch == "": raise ReaderException("EOF while reading regex pattern", rdr) # \uXXXX elif ch == "u" and nSlashes % 2 != 0: ch = read1(rdr) if not ch in hexChars: raise ReaderException("Hexidecimal digit expected" " after \\u in regex pattern," " got: ({0})".format(ch or "EOF"), rdr) pat.append(readUnicodeChar(rdr, ch, 16, 4, True)) nSlashes = 0 # \uXXXXXXXX elif ch == "U" and nSlashes % 2 != 0: ch = read1(rdr) if not ch in hexChars: raise ReaderException("Hexidecimal digit expected" " after \\U in regex pattern," " got: ({0})".format(ch or "EOF"), rdr) pat.append(readUnicodeChar(rdr, ch, 16, 8, True)) nSlashes = 0 else: if ch == "\\": nSlashes += 1 pat.append("\\") pat.append(ch) else: pat.append(ch) ch = read1(rdr) try: return re.compile(u"".join(pat)) except re.error as e: raise ReaderException("invalid regex pattern: {0}".format(e.args[0]), rdr)