def tokeneater(self, toktype, toktext, xxx_todo_changeme,
                   xxx_todo_changeme1, line):
        """
        A callback function used by C{tokenize.tokenize} to handle
        each token in the module.  C{tokeneater} collects tokens into
        the C{self.cur_line} list until a complete logical line has
        been formed; and then calls L{handle_line} to process that line.
        """
        (srow, scol) = xxx_todo_changeme
        (erow, ecol) = xxx_todo_changeme1
        if toktype == token.ERRORTOKEN:
            raise tokenize.TokenError(toktype)

        # Did we skip anything whitespace?  If so, add a pseudotoken
        # for it, with toktype=None.  (Note -- this skipped string
        # might also contain continuation slashes; but I won't bother
        # to colorize them.)
        startpos = self.line_offsets[srow] + scol
        if startpos > self.pos:
            skipped = self.text[self.pos:startpos]
            self.cur_line.append((None, skipped))

        # Update our position.
        self.pos = startpos + len(toktext)

        # Update our current line.
        self.cur_line.append((toktype, toktext))

        # When we reach the end of a line, process it.
        if toktype == token.NEWLINE or toktype == token.ENDMARKER:
            self.handle_line(self.cur_line)
            self.cur_line = []
Esempio n. 2
0
def minify_iteration(input_code):

    # setup vars
    output_code = ""
    last_token = None
    last_erow = -1
    last_ecol = 0

    # setup generators
    linegen = io.StringIO(input_code).readline
    tokgen = tokenize.generate_tokens(linegen)

    # read source code in tokens
    for token, text, (srow, scol), (erow, ecol), line in tokgen:

        # check for parsing errors
        if token == tokenize.ERRORTOKEN:
            raise tokenize.TokenError("Failed to parse python code")

        # choose when to keep a token
        keep_token = (token != tokenize.COMMENT) and (token != tokenize.NL) \
        and not ((last_token == tokenize.NEWLINE) and (token == tokenize.STRING))

        # keep token if flag is set
        if keep_token:

            # set indent flag
            set_indents = (token != tokenize.NEWLINE) \
            and (token != tokenize.STRING) and (last_token != tokenize.STRING) \
            and (token != tokenize.OP) and (last_token != tokenize.OP)

            # restore indentation
            if srow > last_erow:
                last_ecol = 0
            if (scol > last_ecol) and set_indents:
                indents = scol - last_ecol
                output_code += " " * indents

            # convert tabs to spaces
            if token == tokenize.INDENT:
                text = text.replace("\t", " ")

            # convert windows line breaks to proper line breaks
            if token == tokenize.NEWLINE:
                text = "\n"

            # write code to buffer
            output_code += text

        # update vars
        last_erow = erow
        last_ecol = ecol

        if (token != tokenize.DEDENT) and (token != tokenize.INDENT):
            last_token = token

    # return the read source code
    return output_code
Esempio n. 3
0
    def tokeneater(self, toktype, toktext, srowcol, erowcol, line):
        """
        A callback function used by C{tokenize.tokenize} to handle
        each token in the module.  C{tokeneater} collects tokens into
        the C{self.cur_line} list until a complete logical line has
        been formed; and then calls L{handle_line} to process that line.
        """
        srow, scol = srowcol
        erow, ecol = erowcol
        # If we encounter any errors, then just give up.
        if toktype == token.ERRORTOKEN:
            raise tokenize.TokenError(toktype)

        if hasattr(tokenize, 'ENCODING') and toktype == tokenize.ENCODING:
            if self.coding is None:
                self.coding = toktext
            return

        token_startpos = self.token_line_offsets[srow] + scol
        if six.binary_type is str:
            input_startpos = token_startpos
            input_toktext = toktext
        else:
            input_scol = len(line[:scol].encode(self.coding))
            input_startpos = self.input_line_offsets[srow] + input_scol
            input_toktext = toktext.encode(self.coding)

        # Did we skip anything whitespace?  If so, add a pseudotoken
        # for it, with toktype=None.  (Note -- this skipped string
        # might also contain continuation slashes; but I won't bother
        # to colorize them.)
        if input_startpos > self.input_pos:
            skipped = self.text[self.input_pos:input_startpos]
            if six.binary_type is not str:
                skipped = skipped.decode(self.coding)
            self.cur_line.append((None, skipped))

        # Update our position.
        self.token_pos = token_startpos + len(toktext)
        self.input_pos = input_startpos + len(input_toktext)

        # Update our current line.
        self.cur_line.append((toktype, toktext))

        # When we reach the end of a line, process it.
        if toktype == token.NEWLINE or toktype == token.ENDMARKER:
            self.handle_line(self.cur_line)
            self.cur_line = []
Esempio n. 4
0
def parse_docstring(docstring, keywords):
    """Parse a docstring, looking for design-by-contract expressions.

    Returns a list of tuples: the list is the same length as keywords, and
    matches each keyword.  The tuple is (keyword, [decls], [exprs]), namely
    the keyword, a list of string declarations, and a list of tuples (string,
    lineno).

    Examples::
    >>> from pprint import pprint
    >>> pprint( parse_docstring(parse_docstring.__doc__, ['post', 'pre']) )
    [('post', [], [('[ x [ 0 ] for x in __return__ ] == keywords', 22)]),
     ('pre',
      [],
      [('docstring is None or isstring ( docstring )', 18),
       ('forall ( keywords , isstring )', 19)])]

    pre::
        docstring is None or isstring(docstring)
        forall(keywords, isstring)

    post[]::
        [x[0] for x in __return__] == keywords
    """
    result = [(x, [], []) for x in keywords]

    if docstring is None:
        return result

    # step 1: scan through docstring looking for keyword
    input = StringIO(docstring)

    offset = 0
    assert input.tell() == 0

    line = input.readline()
    lineno = 0  # zero-origined because tokenizer keeps 1-origined
    while line != '':
        a = _re_start.split(line)

        if len(a) > _RE_KEYWORD and a[_RE_KEYWORD] in keywords:
            # step 2: found a keyword, now rewind and scan looking
            # for either an inline expression or a series of sub-
            # indented expressions
            input.seek(offset)

            # ttw005... get lineno info and add to exception's lineno
            #           if a TokenError occurs...
            try:
                l = _read_block(input, lineno)
                lineno = l[3]
                # returns (keyword, decls, exprs, lineno)
            except tokenize.TokenError, ex:
                # reformat to include new line info
                raise tokenize.TokenError(ex[0],
                                          (lineno + ex[1][0], ) + ex[1][1:])
            # ...ttw005

            # Find the right result index based on keyword
            r = result[keywords.index(l[0])]
            r[1].extend(l[1])
            r[2].extend(l[2])
        else:
            lineno += 1
        if offset == input.tell(): break
        offset = input.tell()
        line = input.readline()
Esempio n. 5
0
    def generate_tokens (self):
        """Creates a generator object that yields Python/Mython tokens.

        Based on (largely retyped and slightly modified from) the
        tokenize module by Ka Ping Yee and others, licensed under the
        PSFL (see .../basil/thirdparty/PSF_LICENSE).
        """
        namechars = string.ascii_letters + '_'
        numchars = '0123456789'
        while 1:
            line = self.readline()
            pos, max_pos = 0, len(line)
            if self.parenlev == 0 and self.in_mysuite:
                # This should only be reached if no non-whitespace
                # characters were found following the trailing colon
                # of a mysuite statement, and we've already recognized
                # the end of line token for the multi-line quotation.
                # This must go here so the readliner is properly
                # hijacked.
                mysuite_lnum = self.lnum
                mysuite_lns = []
                while ((line != '') and
                       (self.empty_line_pattern.match(line) != None)):
                    mysuite_lns.append(line)
                    line = self.readline()
                    pos, max_pos = 0, len(line)
                indent_ws = ''
                indent_lnum = -1
                if line != '':
                    indent_lnum = self.lnum
                    match_obj = self.ws_pattern.match(line)
                    indent_ws = match_obj.groups(1)[0]
                    while line.startswith(indent_ws):
                        mysuite_lns.append(line)
                        line = self.readline()
                        pos, max_pos = 0, len(line)
                        while ((line != '') and
                               (self.empty_line_pattern.match(line) != None)):
                            mysuite_lns.append(line)
                            line = self.readline()
                            pos, max_pos = 0, len(line)
                else:
                    # TODO: Shouldn't this be MyFrontIndentError?
                    raise SyntaxError("Empty mysuite block, starting on "
                                      "line %d, runs to end of file." %
                                      mysuite_lnum)
                indent_ws_len = len(indent_ws)
                if indent_ws_len <= self.indents[-1]:
                    # TODO: Shouldn't this be MyFrontIndentError?
                    raise SyntaxError("Improper indentation level at "
                                      "line %d; expected %d, got %d." %
                                      (indent_lnum, self.indents[-1],
                                       indent_ws_len))
                normalized_lns = [mysuite_ln[indent_ws_len:]
                                  if len(mysuite_ln) > indent_ws_len
                                  else (mysuite_ln[-2:]
                                        if mysuite_ln.endswith('\r\n')
                                        else mysuite_ln[-1:])
                                  for mysuite_ln in mysuite_lns]
                token = "".join(normalized_lns)
                lines = "".join(mysuite_lns)
                spos = (mysuite_lnum, indent_ws_len)
                epos = (spos[0] + len(mysuite_lns), 0)
                yield self.make_token(MYSUITE, token, spos,
                                      epos, lines)
                self.in_mysuite = False
            if self.contstr:
                if not line:
                    raise tokenize.TokenError("EOF in multi-line string",
                                              self.strstart)
                endmatch = self.endprog.match(line)
                if endmatch:
                    pos = end = endmatch.end(0)
                    yield self.make_token(
                        tokenize.STRING, self.contstr + line[:end],
                        self.strstart, (self.lnum, end), self.contline + line)
                    self.contstr = ''
                    self.needcont = 0
                    self.contline = None
                elif (self.needcont and (line[-2:] != '\\\n') and
                      (line[-3:] != '\\\r\n')):
                    yield self.make_token(
                        tokenize.ERRORTOKEN, self.contstr + line,
                        self.strstart, (self.lnum, len(line)), contline)
                    self.contstr = ''
                    self.contline = None
                    continue
                else:
                    self.contstr += line
                    self.contline += line
                    continue
            elif self.parenlev == 0 and not self.continued:
                if not line: break
                column = 0
                while pos < max_pos:
                    if line[pos] == ' ':
                        column += 1
                    elif line[pos] == '\t':
                        column = (column/self.tabsize + 1) * self.tabsize
                    elif line[pos] == '\f':
                        column = 0
                    else:
                        break
                    pos += 1
                if pos == max_pos:
                    break
                if line[pos] in '#\r\n':
                    if line[pos] == "#":
                        if sys.version_info < (2, 6):
                            yield self.make_token(
                                tokenize.COMMENT, line[pos:],
                                (self.lnum, pos), (self.lnum, len(line)),
                                line)
                        else:
                            comment_token = line[pos:].rstrip('\r\n')
                            nl_pos = pos + len(comment_token)
                            yield self.make_token(
                                tokenize.COMMENT, comment_token,
                                (self.lnum, pos),
                                (self.lnum, pos + len(comment_token)),
                                line)
                            yield self.make_token(
                                tokenize.NL, line[nl_pos:],
                                (self.lnum, nl_pos), (self.lnum, len(line)),
                                line)
                    else:
                        yield self.make_token(
                            tokenize.NL, line[pos:], (self.lnum, pos),
                            (self.lnum, len(line)), line)
                    continue
                if column > self.indents[-1]:
                    self.indents.append(column)
                    yield self.make_token(
                        tokenize.INDENT, line[:pos], (self.lnum, 0),
                        (self.lnum, pos), line)
                while column < self.indents[-1]:
                    if column not in self.indents:
                        raise IndentationError(
                            "unindent does not match any outer indentation "
                            "level", ("<tokenize>", self.lnum, pos, line))
                    self.indents.pop()
                    yield self.make_token(
                        tokenize.DEDENT, '', (self.lnum, pos),
                        (self.lnum, pos), line)
            else:
                if not line:
                    if __debug__:
                        pprint.pprint(self.__dict__)
                    raise tokenize.TokenError("EOF in multi-line statement",
                                              (self.lnum, 0))
                self.continued = 0
            while pos < max_pos:
                pseudomatch = pseudoprog.match(line, pos)
                if pseudomatch:
                    start, end = pseudomatch.span(1)
                    spos, epos, pos = (self.lnum, start), (self.lnum, end), end
                    token, initial = line[start:end], line[start]
                    if ((initial in numchars) or
                        (initial == '.' and token != '.')):
                        yield self.make_token(tokenize.NUMBER, token, spos,
                                              epos, line)
                    elif initial in '\r\n':
                        yield self.make_token(
                            tokenize.NL if self.parenlev > 0 else
                            tokenize.NEWLINE, token, spos, epos, line)
                    elif initial == '#':
                        yield self.make_token(tokenize.COMMENT, token, spos,
                                              epos, line)
                    elif token in tokenize.triple_quoted:
                        self.endprog = tokenize.endprogs[token]
                        endmatch = self.endprog.match(line, pos)
                        if endmatch:
                            pos = endmatch.end(0)
                            token = line[start:pos]
                            yield self.make_token(tokenize.STRING, token, spos,
                                                  (self.lnum, pos), line)
                        else:
                            self.strstart = (self.lnum, start)
                            self.contstr = line[start:]
                            self.contline = line
                            break
                    elif ((initial in tokenize.single_quoted) or
                          (token[:2] in tokenize.single_quoted) or
                          (token[:3] in tokenize.single_quoted)):
                        if token[-1] == '\n':
                            self.strstart = (self.lnum, start)
                            self.endprog = (tokenize.endprogs[initial] or
                                            tokenize.endprogs[token[1]] or
                                            tokenize.endprogs[token[2]])
                            self.contstr = line[start:]
                            self.needcont = 1
                            self.contline = line
                            break
                        else:
                            yield self.make_token(tokenize.STRING, token, spos,
                                                  epos, line)
                    elif ((hasattr(initial, 'isidentifier') and
                             initial.isidentifier()) or
                            (initial in namechars)):
                        yield self.make_token(tokenize.NAME, token, spos, epos,
                                              line)
                    elif initial == '\\':
                        self.continued = 1
                    elif token == '!':
                        yield self.make_token(BANG, token, spos, epos, line)
                    else:
                        if initial in '([{':
                            self.parenlev += 1
                        elif initial in '}])':
                            self.parenlev -= 1
                        yield self.make_token(tokenize.OP, token, spos, epos,
                                              line)
                        if (token == ':' and self.in_mysuite and
                            self.parenlev == 0):
                            cand_token = line[pos:].strip()
                            if cand_token:
                                token_with_ws_len = len(
                                    line[pos:].rstrip('\r\n'))
                                while line[pos].isspace():
                                    pos += 1
                                yield self.make_token(
                                    MYSUITE, cand_token, (self.lnum, pos),
                                    (self.lnum, epos[1] + token_with_ws_len),
                                    line)
                                pos += len(cand_token)
                                self.in_mysuite = False
                        elif self.in_myexpr:
                            open_delim = self.open_delim
                            close_delim = CLOSERS[open_delim]
                            myexpr_depth = 1
                            spos = epos
                            myexpr_lns = [line]
                            while 1:
                                while pos < max_pos:
                                    if line[pos] == open_delim:
                                        myexpr_depth += 1
                                    elif line[pos] == close_delim:
                                        myexpr_depth -= 1
                                    pos += 1
                                    if myexpr_depth <= 0:
                                        if close_delim in '}])':
                                            self.parenlev -= 1
                                        break
                                if myexpr_depth > 0:
                                    line = self.readline()
                                    myexpr_lns.append(line)
                                    pos, max_pos = 0, len(line)
                                else:
                                    assert myexpr_depth == 0
                                    break
                            epos = self.lnum, pos
                            if len(myexpr_lns) == 1:
                                token = line[spos[1]:pos]
                            else:
                                token = "".join((myexpr_lns[0][spos[1]:],
                                                 "".join(myexpr_lns[1:-2]),
                                                 myexpr_lns[-1][:pos]))
                            lines = "".join(myexpr_lns)
                            yield self.make_token(MYEXPR, token, spos, epos,
                                                  lines)
                            self.in_myexpr = False
                            del self.open_delim
                else:
                    yield self.make_token(
                        tokenize.ERRORTOKEN,
                        line[pos] if pos < max_pos else '', (self.lnum, pos),
                        (self.lnum, pos + 1), line)
                    pos += 1
        for indent in self.indents[1:]:
            yield self.make_token(tokenize.DEDENT, '', (self.lnum, 0),
                                  (self.lnum, 0), '')
        yield self.make_token(tokenize.ENDMARKER, '', (self.lnum, 0),
                              (self.lnum, 0), '')
Esempio n. 6
0
 def __init__(self, message, location, start):
     tokenize.TokenError()