Ejemplo n.º 1
0
def dispatchReader(rdr, hash):
    """Read and return the next object defined by the next dispatch character.

    rdr -- read/unread-able object
    hash -- ignored

    Read a character from rdr. Call its associated function in
    dispatchMacros. Return that value. May raise ReaderException."""
    ch = read1(rdr)
    if ch == "":
        raise ReaderException("EOF while reading character", rdr)
    if ch not in dispatchMacros:
        raise ReaderException("No dispatch macro for: ("+ ch + ")", rdr)
    return dispatchMacros[ch](rdr, ch)
Ejemplo n.º 2
0
def derefNotImplemented(rdr, _):
    """Unconditionally raise ReaderException.

    The deref syntax @foo is not currently implemented. @foo will pass through
    silently as a symbol unless it's caught here, as it should be."""
    raise ReaderException("Deref syntax @foo not currently implemented.",
                          rdr)
Ejemplo n.º 3
0
def readDelimitedList(delim, rdr, isRecursive):
    """Read and collect objects until an unmatched delim is reached.

    delim -- the terminating delimiter
    rdr -- read/unread-able object
    isRecursive -- ignored

    May raise ReaderException. Return a Python list of those objects."""
    firstline = rdr.lineCol()[0]
    a = []

    while True:
        ch = read1(rdr)
        while ch in whiteSpace:
            ch = read1(rdr)
        if ch == "":
            raise ReaderException(
                "EOF while reading starting at line {0}".format(firstline))

        if ch == delim:
            break

        macrofn = getMacro(ch)
        if macrofn is not None:
            mret = macrofn(rdr, ch)
            if mret is not None and mret is not rdr:
                a.append(mret)
        else:
            rdr.back()
            o = read(rdr, True, None, isRecursive)
            a.append(o)

    return a
Ejemplo n.º 4
0
def evalReaderNotImplemented(rdr, _):
    """Unconditionally raise ReaderException.

    The eval syntax #= not currently implemented and should be caught by the
    #reader. This message is more informative than the `no dispatch macro'
    message."""
    raise ReaderException("Eval syntax #= not currently implemented.", rdr)
Ejemplo n.º 5
0
def unmatchedClosingDelimiterReader(rdr, un):
    """Raise ReaderException.

    rdr -- read/unread-able object (used for exception message)
    un -- the stray delimiter

    This will be called if un has no matching opening delimiter in rdr."""
    raise ReaderException(
        "Unmatched Delimiter {0} at {1}".format(un, rdr.lineCol()))
Ejemplo n.º 6
0
def stringReader(rdr, doublequote):
    """Read a double-quoted "foo" literal string from rdr.

    rdr -- a read/unread-able object
    doublequote -- ignored

    May raise ReaderException. Return a str or unicode object."""
    buf = []
    ch = read1(rdr)
    while True:
        if ch == "":
            raise ReaderException("EOF while reading string")
        if ch == '\\':
            ch = read1(rdr)
            if ch == "":
                raise ReaderException("EOF while reading string")
            elif ch in chrLiterals:
                ch = chrLiterals[ch]
            elif ch == "u":
                ch = read1(rdr)
                if not ch in hexChars:
                    raise ReaderException(
                        "Hexidecimal digit expected after"
                        " \\u in literal string, got:"
                        " ({0})".format(ch), rdr)
                ch = readUnicodeChar(rdr, ch, 16, 4, True)
            elif ch in octalChars:
                ch = readUnicodeChar(rdr, ch, 8, 3, False)
                if ord(ch) > 255:
                    raise ReaderException(
                        "Octal escape sequence in literal"
                        " string must be in range [0, 377]"
                        ", got: ({0})".format(ord(ch)), rdr)
            else:
                raise ReaderException(
                    "Unsupported escape character in"
                    " literal string: \\{0}".format(ch), rdr)
        elif ch == '"':
            return "".join(buf)
        buf += ch
        ch = read1(rdr)
Ejemplo n.º 7
0
def readUnicodeChar(rdr, initch, base, length, exact):
    """Read a string that specifies a Unicode codepoint.

    rdr -- read/unread-able object
    initch -- the first character of the codepoint string
    base -- expected radix of the codepoint
    length -- maximum number of characters in the codepoint
    exact -- if True, codepoint string must contain length characters
             if False, it must contain [1, length], inclusive

    May raise ReaderException. Return a unicode string of length one."""
    digits = []
    try:
        int(initch, base)
        digits.append(initch)
    except ValueError:
        raise ReaderException(
            "Expected base {0} digit, got:"
            " ({1})".format(base, initch or "EOF"), rdr)
    for i in range(2, length + 1):
        ch = read1(rdr)
        if ch == "" or ch in whiteSpace or isMacro(ch):
            rdr.back()
            i -= 1
            break
        try:
            int(ch, base)
            digits.append(ch)
        except ValueError:
            if exact:
                raise ReaderException(
                    "Expected base {0} digit, got:"
                    " ({1})".format(base, ch or "EOF"), rdr)
            else:
                rdr.back()
                break
    if i != length and exact:
        raise ReaderException(
            "Invalid character length: ({0}), should be:"
            " ({1})".format(i, length), rdr)
    return unichr(int("".join(digits), base))
Ejemplo n.º 8
0
def interpretToken(s):
    """Return the value defined by the string s.

    This function exists as a pre-filter to matchSymbol(). If is is found in
    lispreader.INTERPRET_TOKENS, return that, else see if s is a valid Symbol
    and return that.

    Raise ReaderException if s is not a valid token."""
    if s in INTERPRET_TOKENS:
        return INTERPRET_TOKENS[s]
    ret = matchSymbol(s)
    if ret is None:
        raise ReaderException("Unknown symbol {0}".format(s))
    return ret
Ejemplo n.º 9
0
def unquoteReader(rdr, tilde):
    """Return one of:
    * (unquote-splicing next-object-read)
    * (unquote next-object-read)"""
    s = read1(rdr)
    if s == "":
        raise ReaderException("EOF reading unquote", rdr)
    if s == "@":
        o = read(rdr, True, None, True)
        return RT.list(_UNQUOTE_SPLICING_, o)
    else:
        rdr.back()
        o = read(rdr, True, None, True)
        return RT.list(_UNQUOTE_, o)
Ejemplo n.º 10
0
def readNumber(rdr, initch):
    """Return the next number read from rdr.

    rdr -- a read/unread-able object
    initch -- the first character of the number

    May raise ReaderException."""
    sb = [initch]
    while True:
        ch = read1(rdr)
        if ch == "" or ch in whiteSpace or isMacro(ch):
            rdr.back()
            break
        sb.append(ch)

    s = "".join(sb)
    try:
        n = matchNumber(s)
    except Exception as e:
        raise ReaderException(e.args[0], rdr)
    if n is None:
        raise ReaderException("Invalid number: " + s, rdr)
    return n
Ejemplo n.º 11
0
def metaReader(rdr, caret):
    """Read two objects from rdr. Return second with first as meta data.

    rdr -- read/unread-able object
    caret -- ignored

    May raise ReaderException."""
    line = rdr.lineCol()[0]
    meta = read(rdr, True, None, True)
    if isinstance(meta, (str, Symbol)):
        meta = RT.map(TAG_KEY, meta)
    elif isinstance(meta, Keyword):
        meta = RT.map(meta, T)
    elif not isinstance(meta, IPersistentMap):
        raise ReaderException("Metadata must be Symbol,Keyword,String or Map",
                              rdr)
    o = read(rdr, True, None, True)
    if not hasattr(o, "withMeta"):
        # can't attach rdr to the exception here as it would point
        # to the *end* of the object just read'
        raise ReaderException("Cannot attach meta to a object without"
                              " .withMeta")
    return o.withMeta(meta)
Ejemplo n.º 12
0
def read(rdr, eofIsError, eofValue, isRecursive):
    """Read and return one object from rdr.

    rdr -- a read/unread-able object
    eofIsError -- if True, raise an exception when rdr is out of characters
                  if False, return eofValue instead
    eofValue --   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    isRecursive -- not currently used

    The basic sequence is as follows:
    1. consume white space
    2. check for eof
    3. check for a number (sans [+-])       <- Does this
    4. dispatch on macro characters         <- order matter?
    5. check for a number (with [+-])
    6. check for a symbol"""
    while True:
        ch = read1(rdr)

        while ch in whiteSpace:
            ch = read1(rdr)

        if ch == "":
            if eofIsError:
                raise ReaderException("EOF while reading", rdr)
            else:
                return eofValue

        if ch.isdigit():
            return readNumber(rdr, ch)

        m = getMacro(ch)
        if m is not None:
            ret = m(rdr, ch)
            if ret is rdr:
                continue
            return ret

        if ch in ["+", "-"]:
            ch2 = read1(rdr)
            if ch2.isdigit():
                rdr.back()
                n = readNumber(rdr, ch)
                return n
            rdr.back()

        token = readToken(rdr, ch)
        return interpretToken(token)
Ejemplo n.º 13
0
def argReader(rdr, perc):
    """Read and intern an anonymous function argument (%, %1, %&, etc.).

    rdr -- read/unread-able object
    prec -- ignored

    May raise IllegalStateException, or ReaderException.
    """
    if ARG_ENV.deref() is None:
        return interpretToken(readToken(rdr, '%'))
    ch = read1(rdr)
    rdr.back()
    if ch == "" or ch in whiteSpace or isTerminatingMacro(ch):
        return registerArg(1)
    n = read(rdr, True, None, True)
    if isinstance(n, Symbol) and n == _AMP_:
        return registerArg(-1)
    if not isinstance(n, int):
        raise ReaderException("arg literal must be %, %& or %integer", rdr)
    return registerArg(n)
Ejemplo n.º 14
0
def characterReader(rdr, backslash):
    """Read a single clojure-py formatted character from rdr.

    rdr -- read/unread-able object
    backslash -- ignored

    May raise ReaderException. Return a unicode string of lenght one."""
    ch = rdr.read()
    if ch == "":
        raise ReaderException("EOF while reading character", rdr)
    token = readToken(rdr, ch)  # .decode("utf-8")
    if len(token) == 1:
        return token
    elif token in namedChars:
        return namedChars[token]
    elif token.startswith("u"):
        try:
            ch = stringCodepointToUnicodeChar(token, 1, 4, 16)
        except UnicodeError as e:
            raise ReaderException(e.args[0], rdr)
        codepoint = ord(ch)
        if u"\ud800" <= ch <= u"\udfff":
            raise ReaderException(
                "Invalid character constant in literal"
                " string: \\{0}".format(token), rdr)
        return ch
    elif token.startswith("o"):
        if len(token) > 4:
            raise ReaderException(
                "Invalid octal escape sequence length in"
                " literal string. Three digits max:"
                " \\{0}".format(token), rdr)
        try:
            ch = stringCodepointToUnicodeChar(token, 1, len(token) - 1, 8)
        except UnicodeError as e:
            raise ReaderException(e.args[0], rdr)
        codepoint = ord(ch)
        if codepoint > 255:
            raise ReaderException(
                "Octal escape sequence in literal string"
                " must be in range [0, 377], got:"
                " (\\o{0})".format(codepoint), rdr)
        return ch
    raise ReaderException("Unsupported character: \\" + token, rdr)
Ejemplo n.º 15
0
def readNamedUnicodeChar(rdr):
    """Read \N{foo} syntax, starting at the {.

    rdr -- a read/unread-able object

    May raise ReaderException. Return the unicode character named by foo."""
    buf = []
    ch = read1(rdr)
    if ch != "{":
        raise ReaderException(
            "Expected { in named unicode escape sequence,"
            " got: ({0})".format(ch or "EOF"), rdr)
    while True:
        ch = read1(rdr)
        if ch == "":
            raise ReaderException(
                "EOF while reading named unicode escape"
                " sequence", rdr)
        elif ch in unicodeNameChars:
            buf.append(ch)
            continue
        elif ch == '"':
            raise ReaderException(
                "Missing } while reading named unicode"
                " escape sequence", rdr)
        elif ch == '}':
            break
        else:
            raise ReaderException(
                "Illegal character in named unicode"
                " escape sequence: ({0})".format(ch), rdr)
    name = "".join(buf).strip()
    if len(name) == 0:
        raise ReaderException(
            "Expected name between {} in named unicode "
            "escape sequence", rdr)
    try:
        return unicodedata.lookup(name)
    except KeyError:
        raise ReaderException(
            "Unknown unicode character name in escape"
            " sequence: ({0})".format(name), rdr)
Ejemplo n.º 16
0
    def syntaxQuote(self, form):
        # compiler uses this module, so import it lazily
        from clojure.lang.compiler import builtins as compilerbuiltins

        if form in compilerbuiltins:
            ret = RT.list(_QUOTE_, form)
        elif isinstance(form, Symbol):
            sym = form
            if sym.ns is None and sym.name.endswith("#"):
                gmap = GENSYM_ENV.deref()
                if gmap == None:
                    raise ReaderException("Gensym literal not in syntax-quote, before", self.rdr)
                gs = gmap[sym]
                if gs is None:
                    gs = Symbol(None, "{0}__{1}__auto__".format(sym.name[:-1], RT.nextID()))
                    GENSYM_ENV.set(gmap.assoc(sym, gs))
                sym = gs
            elif sym.ns is None and sym.name.endswith("."):
                ret = sym
            elif sym.ns is None and sym.name.startswith("."):
                ret = sym
            elif sym.ns is not None:
                ret = sym

            else:
                comp = currentCompiler.deref()
                if comp is None:
                    raise IllegalStateException("No Compiler found in syntax quote!")
                ns = comp.getNS()
                if ns is None:
                    raise IllegalStateException("No ns in reader")
                
                item = namespace.findItem(ns, sym)
                if item is None:
                    sym = Symbol(ns.__name__, sym.name)
                else:
                    sym = Symbol(item.ns.__name__, sym.name)
            ret = RT.list(_QUOTE_, sym)
        else:
            if isUnquote(form):
                return form.next().first()
            elif isUnquoteSplicing(form):
                raise IllegalStateException("splice not in list")
            elif isinstance(form, IPersistentCollection):
                if isinstance(form, IPersistentMap):
                    keyvals = self.flattenMap(form)
                    ret = RT.list(_APPLY_, _HASHMAP_, RT.list(RT.cons(_CONCAT_, self.sqExpandList(keyvals.seq()))))
                elif isinstance(form, (IPersistentVector, IPersistentSet)):
                    ret = RT.list(_APPLY_, _VECTOR_, RT.list(_SEQ_, RT.cons(_CONCAT_, self.sqExpandList(form.seq()))))
                elif isinstance(form, (ISeq, IPersistentList)):
                    seq = form.seq()
                    if seq is None:
                        ret = RT.cons(_LIST_, None)
                    else:
                        ret = RT.list(_SEQ_, RT.cons(_CONCAT_, self.sqExpandList(seq)))
                else:
                    raise IllegalStateException("Unknown collection type")
            elif isinstance(form, (int, float, str, Keyword)):
                ret = form
            else:
                ret = RT.list(_QUOTE_, form)
        if getattr(form, "meta", lambda: None)() is not None:
            newMeta = form.meta().without(LINE_KEY)
            if len(newMeta) > 0:
                return RT.list(_WITH_META_, ret, self.syntaxQuote(form.meta()))#FIXME: _WITH_META_ undefined
        return ret
Ejemplo n.º 17
0
def regexReader(rdr, doublequote):
    """Read a possibly multi-line Python re pattern string.

    rdr -- read/unread-able object
    doubleQuote -- ignored
    raw -- if True, the string is to be treated as a Python r"string".

    May raise ReaderException. Return a Unicode string"""
    pat = []
    ch = read1(rdr)
    while ch != '"':
        if ch == "":
            raise ReaderException("EOF while reading regex pattern", rdr)
        if ch == "\\":
            ch = read1(rdr)
            if ch == "":
                raise ReaderException("EOF while reading regex pattern", rdr)
            # \, ', ", a, b, f, n, r, t, v
            elif ch in regexCharLiterals:
                ch = regexCharLiterals[ch]
            # \uXXXX
            elif ch == "u":
                ch = read1(rdr)
                if not ch in hexChars:
                    raise ReaderException("Hexidecimal digit expected after"
                                          " \\u in regex pattern, got:"
                                          " ({0})".format(ch or "EOF"), rdr)
                ch = readUnicodeChar(rdr, ch, 16, 4, True)
            # \uXXXXXXXX
            elif ch == "U":
                ch = read1(rdr)
                if not ch in hexChars:
                    raise ReaderException("Hexidecimal digit expected after"
                                          " \\U in regex pattern, got:"
                                          " ({0})".format(ch or "EOF"), rdr)
                ch = readUnicodeChar(rdr, ch, 16, 8, True)
            # \xXX
            elif ch == "x":
                ch = read1(rdr)
                if not ch in hexChars:
                    raise ReaderException("Hexidecimal digit expected after"
                                          " \\x in regex pattern, got:"
                                          " ({0})".format(ch or "EOF"), rdr)
                ch = readUnicodeChar(rdr, ch, 16, 2, True)
            #\O, \OO, or \OOO
            elif ch.isdigit():
                ch = readUnicodeChar(rdr, ch, 8, 3, False) # <= False
            #\N{named unicode character}
            elif ch == "N":
                ch = readNamedUnicodeChar(rdr)
            # Didnt recognize any escape sequence but ch got
            # reset to the char after \\ so...
            else:
                pat.append("\\")
        pat.append(ch)
        ch = read1(rdr)
    try:
        return re.compile(u"".join(pat))
    except re.error as e:
        raise ReaderException("invalid regex pattern: {0}".format(e.args[0]),
                              rdr)
Ejemplo n.º 18
0
def rawRegexReader(rdr, r):
    r"""Read a regex pattern string ignoring most escape sequences.

    rdr -- a read/unread-able object
    r -- ignored

    The following two are the only valid escape sequences. But only if they
    are not preceded by an even number of backslashes. When \ are in pairs
    they've lost their abilty to escape the next character. Both backslashes
    *still* get put into the string.

      * \uXXXX
        \u03bb => λ
        \\u03bb => \ \ u 0 3 b b
        \\\u03bb => \ \ λ
      * \UXXXXXXXX
        same as above

    Everything else will result in two characters in the string:
    \n => \ n
    \r => \ r
    \t => \ t
    \" => \ "
    \xff => \ x f f
    \377 => \ 3 7 7
    \N{foo} \ N { f o o }

    May raise ReaderException. Return a Unicode string.
    """
    nSlashes = 0
    pat = []
    ch = read1(rdr)
    if ch == "":
        raise ReaderException("EOF expecting regex pattern", rdr)
    if ch != '"':
        raise ReaderException("Expected regex pattern after #r", rdr)
    ch = read1(rdr)
    while ch != '"':
        if ch == "":
            raise ReaderException("EOF while reading regex pattern", rdr)
        if ch == "\\":
            nSlashes += 1
            ch = read1(rdr)
            if ch == "":
                raise ReaderException("EOF while reading regex pattern", rdr)
            # \uXXXX
            elif ch == "u" and nSlashes % 2 != 0:
                ch = read1(rdr)
                if not ch in hexChars:
                    raise ReaderException("Hexidecimal digit expected"
                                          " after \\u in regex pattern,"
                                          " got: ({0})".format(ch or "EOF"),
                                          rdr)
                pat.append(readUnicodeChar(rdr, ch, 16, 4, True))
                nSlashes = 0
            # \uXXXXXXXX
            elif ch == "U" and nSlashes % 2 != 0:
                ch = read1(rdr)
                if not ch in hexChars:
                    raise ReaderException("Hexidecimal digit expected"
                                          " after \\U in regex pattern,"
                                          " got: ({0})".format(ch or "EOF"),
                                          rdr)
                pat.append(readUnicodeChar(rdr, ch, 16, 8, True))
                nSlashes = 0
            else:
                if ch == "\\":
                    nSlashes += 1
                pat.append("\\")
                pat.append(ch)
        else:
            pat.append(ch)
        ch = read1(rdr)
    try:
        return re.compile(u"".join(pat))
    except re.error as e:
        raise ReaderException("invalid regex pattern: {0}".format(e.args[0]),
                              rdr)