def main():
    py_input = """exec admin 'show info'
print 'hello'
exec sql 'select * from namespace1'\n"""
    print py_input
    py_stream = cStringIO.StringIO(py_input)
    print tokenize.untokenize(tarantool_translate(py_stream.readline))
    def check_roundtrip(self, f):
        """
        Test roundtrip for `untokenize`. `f` is an open file or a string.
        The source code in f is tokenized to both 5- and 2-tuples.
        Both sequences are converted back to source code via
        tokenize.untokenize(), and the latter tokenized again to 2-tuples.
        The test fails if the 3 pair tokenizations do not match.

        When untokenize bugs are fixed, untokenize with 5-tuples should
        reproduce code that does not contain a backslash continuation
        following spaces.  A proper test should test this.
        """
        # Get source code and original tokenizations
        if isinstance(f, str):
            code = f.encode('utf-8')
        else:
            code = f.read()
            f.close()
        readline = iter(code.splitlines(keepends=True)).__next__
        tokens5 = list(tokenize(readline))
        tokens2 = [tok[:2] for tok in tokens5]
        # Reproduce tokens2 from pairs
        bytes_from2 = untokenize(tokens2)
        readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
        tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
        self.assertEqual(tokens2_from2, tokens2)
        # Reproduce tokens2 from 5-tuples
        bytes_from5 = untokenize(tokens5)
        readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
        tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
        self.assertEqual(tokens2_from5, tokens2)
Example #3
0
def get_context(source, position):
    lines, lineno = get_block(source, position)

    tokens = TokenGenerator(lines)
    ctype, ctx, match, fctx = 'expr', '', '', ''
    while True:
        tid, value = tokens.next()
        if not tid: break

        if tid == NAME and value == 'import':
            ctype, fctx = 'import', None
            ctx, match = parse_import(tokens)

        elif tid == NAME and value == 'from':
            fctx = None
            ctype, ctx, match = parse_from(tokens)

        elif tid == NAME or value in BRACKETS.keys():
            ctype = 'expr'
            tokens.hold(tid, value)
            ctx, match, fctx = parse_expr(tokens)
            ctx = untokenize(prep_tokens(ctx)).strip().rstrip('.')
            fctx = untokenize(prep_tokens(fctx)).strip().rstrip('.')

        else:
            ctype, ctx, match, fctx = 'expr', '', '', ''

    return ctype, lineno, ctx, match, fctx
Example #4
0
    def __init__(self, tokens, filename='<unknown>', line_offset=0):
        """Create an executor for a token stream

        Arguments:
            tokens (List[TokenInfo]): The tokens to execute.
            filename (Optional[str]): The filename where the tokens originated
                                      (default: ``'<unknown>'``).
                                      Used in error handling, but never opened.
            line_offset (Optional[str]): An offset of tokens within the input
                                         file (default: zero).

        Raises:
            RuleExecutionError: Raised if the token stream is invalid or if
                                it could not be compiled.
        """

        self.input_tokens = tokens
        self.input_lines = tokenize.untokenize(self.input_tokens).split('\n')
        self.filename = filename
        self.line_offset = line_offset

        self._validate_paren_levels(tokens)
        self.eval_tokens = self._gen_eval_tokens(tokens)
        self.eval_str = tokenize.untokenize(self.eval_tokens)
        self.codeobj = self._compile(self.eval_str)
Example #5
0
 def dealwith(self, readline, **kwargs):
     """
         Replace the contents of spec file with the translated version
         readline should be a callable object
         , which provides the same interface as the readline() method of built-in file objects
     """
     data = []
     try:
         # We pass in the data variable as an argument so that we
         # get partial output even in the case of an exception.
         self.tokeniser.translate(readline, data, **kwargs)
     except Exception as e:
         # Comment out partial output so that it doesn't result in
         # a syntax error when received by the interpreter.
         lines = []
         for line in untokenize(data).split('\n'):
             lines.append("# %s" % line)
         
         # Create exception to put into code to announce error
         exception = 'raise Exception("""--- internal spec codec error --- %s""")' % e
         
         # Need to make sure the exception doesn't add a new line and put out line numberes
         if len(lines) == 1:
             data = "%s%s" % (exception, lines[0])
         else:
             lines.append(exception)
             first_line = lines.pop()
             lines[0] = "%s %s" % (first_line, lines[0])
             data = '\n'.join(lines)
     else:
         # At this point, data is a list of tokens
         data = untokenize(data)
     
     return data
Example #6
0
def main():
    """Executed when script is run as-is."""
    # magic_files = {}
    for filename in locate_files(ROOT_DIR):
        print("Processing %s" % filename)
        with open(filename, "rt") as f:
            tokens = list(tokenize.generate_tokens(f.readline))
            text1 = tokenize.untokenize(tokens)
            ntokens = normalize_tokens(tokens)
            text2 = tokenize.untokenize(ntokens)
            assert text1 == text2
def remove_comments(src):
    """
    This reads tokens using tokenize.generate_tokens and recombines them
    using tokenize.untokenize, and skipping comment/docstring tokens in between
    """
    f = cStringIO.StringIO(src)
    class SkipException(Exception): pass
    processed_tokens = []
    last_token = None
    # go thru all the tokens and try to skip comments and docstrings
    for tok in tokenize.generate_tokens(f.readline):
        t_type, t_string, t_srow_scol, t_erow_ecol, t_line = tok

        try:
            if t_type == tokenize.COMMENT:
                raise SkipException()

            elif t_type == tokenize.STRING:

                if last_token is None or last_token[0] in [tokenize.INDENT]:
                    # FIXEME: this may remove valid strings too?
                    #raise SkipException()
                    pass

        except SkipException:
            pass
        else:
            processed_tokens.append(tok)

        last_token = tok

    return tokenize.untokenize(processed_tokens)
 def __init__(self, *args, **kwargs):
     utf_8.StreamReader.__init__(self, *args, **kwargs)
     try:
         data = tokenize.untokenize(tarantool_translate(self.stream.readline))
         self.stream = cStringIO.StringIO(data)
     except Exception:
         self.stream.seek(0)
def transform_source_code(text):
    '''Input text is assumed to contain some French equivalent words to
       normal Python keywords and a few builtin functions.
       These are transformed into normal Python keywords and functions.
    '''
    # continue, def, global, lambda, nonlocal remain unchanged by choice

    dictionary = {'Faux': 'False', 'Aucun': 'None', 'Vrai': 'True',
                   'et': 'and', 'comme': 'as', 'affirme': 'assert',
                   'sortir': 'break', 'classe': 'class', 'élimine': 'del',
                   'ousi': 'elif', 'autrement': 'else', 'exception': 'except',
                   'finalement': 'finally', 'pour': 'for', 'de': 'from',
                   'si': 'if', 'importe': 'import', 'dans': 'in', 'est': 'is',
                   'non': 'not', 'ou': 'or', 'passe': 'pass',
                   'soulever': 'raise', 'retourne': 'return', 'essayer': 'try',
                   'pendant': 'while', 'avec': 'with', 'céder': 'yield',
                   'imprime': 'print', 'intervalle': 'range'}

    toks = tokenize.generate_tokens(StringIO(text).readline)
    result = []
    for toktype, tokvalue, _, _, _ in toks:
        if toktype == tokenize.NAME and tokvalue in dictionary:
            result.append((toktype, dictionary[tokvalue]))
        else:
            result.append((toktype, tokvalue))
    return tokenize.untokenize(result)
Example #10
0
def commandline():
    """zhpy3, the python language in Traditional Chinese

    usage: twpy file.twpy
    """
    if len(sys.argv) != 2:
        print(commandline.__doc__)
        sys.exit(1)

    file_path = sys.argv[1]

    if not os.path.exists(file_path):
        print("twpy: file '%s' does not exists" % file_path)
        sys.exit(1)

    #sys.meta_path = [ImportHook()]

    sys.path[0] = os.path.dirname(os.path.join(os.getcwd(), file_path))

    source = tokenize.untokenize(
            list(translate_code(open(file_path).readline, translations)))

    #translate_module(__builtins__)

    code = compile(source, file_path, "exec")

    runpy._run_module_code(code, mod_name="__main__")
def fixLazyJson (in_text):
    tokengen = tokenize.generate_tokens(StringIO(in_text).readline)

    result = []
    for tokid, tokval, _, _, _ in tokengen:
        # fix unquoted strings
        if (tokid == token.NAME):
            if tokval not in ['true', 'false', 'null', '-Infinity', 'Infinity', 'NaN']:
                tokid = token.STRING
                tokval = u'"%s"' % tokval

        # fix single-quoted strings
        elif (tokid == token.STRING):
            if tokval.startswith ("'"):
                tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"')

        # remove invalid commas
        elif (tokid == token.OP) and ((tokval == '}') or (tokval == ']')):
            if (len(result) > 0) and (result[-1][1] == ','):
                result.pop()

        # fix single-quoted strings
        elif (tokid == token.STRING):
            if tokval.startswith ("'"):
                tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"')

        result.append((tokid, tokval))

    return tokenize.untokenize(result)
Example #12
0
def globals_from_file(filename):
    _file = open(filename)
    data  = tokenize.untokenize(translate(_file.readline))
    compiled = compile(data, filename, "exec")
    globals_ = {}
    exec(compiled, globals_)
    return globals_
Example #13
0
def decistmt(s):
    """Substitute Decimals for floats in a string of statements.

    >>> from decimal import Decimal
    >>> s = 'print +21.3e-5*-.1234/81.7'
    >>> decistmt(s)
    "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"

    The format of the exponent is inherited from the platform C library.
    Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
    we're only showing 12 digits, and the 13th isn't close to 5, the
    rest of the output should be platform-independent.

    >>> exec(s) #doctest: +ELLIPSIS
    -3.21716034272e-0...7

    Output from calculations with Decimal should be identical across all
    platforms.

    >>> exec(decistmt(s))
    -3.217160342717258261933904529E-7
    """

    result = []
    g = generate_tokens(StringIO(s).readline)  # tokenize the string
    for toknum, tokval, _, _, _ in g:
        if toknum == NUMBER and "." in tokval:  # replace NUMBER tokens
            result.extend([(NAME, "Decimal"), (OP, "("), (STRING, repr(tokval)), (OP, ")")])
        else:
            result.append((toknum, tokval))
    return untokenize(result)
Example #14
0
    def _get_trait_definition(self):
        """ Retrieve the Trait attribute definition
        """

        # Get the class source and tokenize it.
        source = inspect.getsource(self.parent)
        string_io = StringIO.StringIO(source)
        tokens = tokenize.generate_tokens(string_io.readline)

        # find the trait definition start
        trait_found = False
        name_found = False
        while not trait_found:
            item = tokens.next()
            if name_found and item[:2] == (token.OP, '='):
                trait_found = True
                continue
            if item[:2] == (token.NAME, self.object_name):
                name_found = True

        # Retrieve the trait definition.
        definition_tokens = []
        for type, name, start, stop, line in tokens:
            if type == token.NEWLINE:
                break
            item = (type, name, (0, start[1]), (0, stop[1]), line)
            definition_tokens.append(item)

        return tokenize.untokenize(definition_tokens).strip()
Example #15
0
def Untokenize(offset_tokens):
  """Return the string representation of an iterable of OffsetTokens."""
  # Make a copy. Don't modify the original.
  offset_tokens = collections.deque(offset_tokens)

  # Strip leading NL tokens.
  while offset_tokens[0].type == tokenize.NL:
    offset_tokens.popleft()

  # Strip leading vertical whitespace.
  first_token = offset_tokens.popleft()
  # Take care not to modify the existing token. Create a new one in its place.
  first_token = OffsetToken(first_token.type, first_token.string,
                            (0, first_token.offset[1]))
  offset_tokens.appendleft(first_token)

  # Convert OffsetTokens to tokenize tokens.
  tokenize_tokens = []
  row = 1
  col = 0
  for t in offset_tokens:
    offset_row, offset_col = t.offset
    if offset_row == 0:
      col += offset_col
    else:
      row += offset_row
      col = offset_col
    tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None))

  # tokenize can't handle whitespace before line continuations.
  # So add a space.
  return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n')
Example #16
0
def main():
    import tempfile
    if sys.argv[1] == '-p':
        file = sys.argv[2]
        print_script = True
        tree = maketree(Tokens(file), preamble=True)
    else:
        file = sys.argv[1]
        print_script = False
        tree = maketree(Tokens(file))

    try:
        code = tokenize.untokenize(flatten(tree)).decode()
    except:
        pprint(tree, indent=4)
        raise
    if print_script:
        print(code)
        sys.exit()

    del sys.argv[0]

    tf = tempfile.NamedTemporaryFile('w')
    tf.write(code)
    tf.flush()
    ns = {'__name__': '__main__'}
    exec(PREAMBLE, ns)
    try:
        exec(compile(code, tf.name, 'exec'), ns)
    except Exception as e:
        # pprint(tree, indent=4)
        print(code)
        raise
Example #17
0
File: expr.py Project: Axik/pandas
def _preparse(source, f=compose(_replace_locals, _replace_booleans,
                                _rewrite_assign)):
    """Compose a collection of tokenization functions

    Parameters
    ----------
    source : str
        A Python source code string
    f : callable
        This takes a tuple of (toknum, tokval) as its argument and returns a
        tuple with the same structure but possibly different elements. Defaults
        to the composition of ``_rewrite_assign``, ``_replace_booleans``, and
        ``_replace_locals``.

    Returns
    -------
    s : str
        Valid Python source code

    Notes
    -----
    The `f` parameter can be any callable that takes *and* returns input of the
    form ``(toknum, tokval)``, where ``toknum`` is one of the constants from
    the ``tokenize`` module and ``tokval`` is a string.
    """
    assert callable(f), 'f must be callable'
    return tokenize.untokenize(lmap(f, tokenize_string(source)))
def decistmt(s):
    """Substitute Decimals for floats in a string of statements.

    >>> from decimal import Decimal
    >>> s = 'print +21.3e-5*-.1234/81.7'
    >>> decistmt(s)
    "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"

    >>> exec(s)
    -3.21716034272e-007
    >>> exec(decistmt(s))
    -3.217160342717258261933904529E-7
    """
    result = []
    # tokenize the string
    g = tokenize.generate_tokens(StringIO(s).readline)
    for toknum, tokval, _, _, _ in g:
        # replace NUMBER tokens
        if toknum == tokenize.NUMBER and '.' in tokval:
            result.extend([
                (tokenize.NAME, 'Decimal'),
                (tokenize.OP, '('),
                (tokenize.STRING, repr(tokval)),
                (tokenize.OP, ')')
            ])
        else:
            result.append((toknum, tokval))
    return tokenize.untokenize(result)
Example #19
0
def test_DeleteStatement_valid(input, expected_type, expected_expr):
    smt = parser.ExpressionStatement.try_parse(tok(input))

    str_expr = tokenize.untokenize(smt.expr).strip()

    assert smt.type == expected_type
    assert str_expr == expected_expr
Example #20
0
def feedInput(code, test) :
	# Initial variable declaration
	temp = 0
	i = 0
	limit = len(test)
	# Tokenize the code
	g = tokenize.generate_tokens(io.BytesIO("\n".join(code)).readline)
	result = []
	# Traverse for each token
	for toknum, tokval, _, _, _ in g:
		# True if an input statement wasnt found 3 tokens prior
		if(temp==0) :
			# True is there are test cases to be inputed and token found happens to be input
			if(i<limit and tokval=="input") :
				# replace token with value
				result.append((toknum, test[i]))
				i += 1
				temp = 3
			else :
				result.append((toknum, tokval))
		else :
			# Input was found
			temp -= 1
	# Return the untokenized form of code in form of list
	return tokenize.untokenize(result).split("\n")
Example #21
0
def fix_lazy_json(in_text):
    """
    This function modifies JS-contained JSON to be valid.

    Posted in http://stackoverflow.com/questions/4033633/handling-lazy-json-\
            in-python-expecting-property-name by Pau Sánchez (codigomanso.com)
    """
    tokengen = tokenize.generate_tokens(io.StringIO(in_text).readline)

    valid_tokens = ['true', 'false', 'null', '-Infinity', 'Infinity', 'NaN']
    result = []
    for tokid, tokval, _, _, _ in tokengen:
        # fix unquoted strings
        if tokid == token.NAME:
            tokid, tokval = fix_unquoted((tokid, tokval), valid_tokens)

        # fix single-quoted strings
        elif tokid == token.STRING:
            tokval = fix_single_quoted(tokval)

        # remove invalid commas
        elif (tokid == token.OP) and ((tokval == '}') or (tokval == ']')):
            result = remove_invalid_commas(result)

        result.append((tokid, tokval))

    return tokenize.untokenize(result)
Example #22
0
def convert(readline):
    result = []
    in_repeat = False

    for ttype, tval, _, _, _ in tokenize.generate_tokens(readline):
        if ttype == token.NAME and tval == "repeat":
            result.extend([
                (token.NAME, "for"),
                (token.NAME, "_"),
                (token.NAME, "in"),
                (token.NAME, "range"),
                (token.OP, "(")
            ])
            in_repeat = True

        elif in_repeat and ttype == token.OP and tval == ":":
            result.extend([
                (token.NAME, ")"),
                (token.OP, ":")
            ])

        else:
            result.append((ttype, tval))

    return tokenize.untokenize(result)
Example #23
0
    def preprocess(source):
        # Syntax tree has whitespace & comments stripped, so use the tokenizer
        # to get them instead & strip out any pydoc.

        import tokenize, token, io

        line_indents = []
        comments = []

        def _preprocess(tokens):
            import token

            lineno = 0
            indent = comment = ""
            for t in tokens:
                if t.type == token.INDENT:
                    indent = t.string
                if t.type == tokenize.COMMENT:
                    comment = "//" + t.string[1:]
                if t.type in (token.NEWLINE, tokenize.NL):
                    line_indents.append(indent)
                    comments.append(comment)
                    # indent = '' - only counts new indents?
                    comment = ""
                    lineno += 1
                if t.type == token.STRING:
                    continue
                yield (t.type, t.string)

        stream = io.StringIO(source).readline
        stream = _preprocess(tokenize.generate_tokens(stream))
        source = tokenize.untokenize(stream)
        return source, line_indents, comments
Example #24
0
    def __substituteVars(self, code, env):
        '''
        Expand any variables that exist in the given environment to their corresponding values
        '''

        # tokenize the given expression code
        gtoks = tokenize.generate_tokens(StringIO.StringIO(code).readline)

        # iterate over each token and replace any matching token with its corresponding value
        tokens = []
        for toknum, tokval, _, _, _ in gtoks:
            if toknum == tokenize.NAME and tokval in env:
                ntoks = tokenize.generate_tokens(StringIO.StringIO(str(env[tokval])).readline)
                tokens.extend(ntoks)
            else:
                tokens.append((toknum, tokval))

        # convert the tokens back to a string
        code = tokenize.untokenize(tokens)

        # remove all the leading and trailing spaces
        code = code.strip()

        # return the modified string
        return code
Example #25
0
 def visit(self):
     modified = []
     for toknum, tokval, tokbegin, tokend, tokline in self.tokens:
         # print (token.tok_name[toknum], tokval)
         if toknum != tokenize.COMMENT:
             modified.append((toknum, tokval))
         else:
             tokval_1 = tokval.strip(" \t#")
             tokbegin = tokbegin[0] + self.offset, tokbegin[1]
             tokend = tokend[0] + self.offset, tokend[1]
             handler_name = "%s_handler" % tokval_1.split()[0].lower()
             handler = getattr(self, handler_name, None)
             if handler:
                 dedents_new = len(modified)
                 new_tokens = handler(toknum, tokval_1, tokbegin, tokend, tokline)
                 self.offset += sum([1 for x in new_tokens if x[0] == tokenize.NEWLINE])
                 modified.extend(new_tokens)
                 dedents_old = len(modified) + 1
                 self.dedents_patch_loc.append((dedents_new, dedents_old))
             else:
                 modified.append((toknum, tokval))
     # for x,y in modified:
     #     print (token.tok_name[x], y)
     # print self.dedents_patch_loc
     # print modified
     for x, y in self.dedents_patch_loc:
         # print modified[x], token.tok_name[modified[y][0]]
         if modified[y][0] in [tokenize.INDENT, tokenize.DEDENT]:
             modified.insert(x, modified[y])
             del modified[y + 1]
     # print modified
     return tokenize.untokenize(modified)
Example #26
0
    def _get_trait_definition(self):
        """ Retrieve the Trait attribute definition
        """

        # Get the class source and tokenize it.
        source = inspect.getsource(self.parent)
        string_io = StringIO.StringIO(source)
        tokens = tokenize.generate_tokens(string_io.readline)

        # find the trait definition start
        trait_found = False
        name_found = False
        while not trait_found:
            item = next(tokens)
            if name_found and item[:2] == (token.OP, '='):
                trait_found = True
                continue
            if item[:2] == (token.NAME, self.object_name):
                name_found = True

        # Retrieve the trait definition.
        definition_tokens = _get_definition_tokens(tokens)
        definition = tokenize.untokenize(definition_tokens).strip()
        if not IS_PY3:
            definition = unicode(definition, 'utf-8')

        return definition
Example #27
0
def tostring(tokens):
    '''Converte lista de tokens para string'''

    last_pos = tokens[0].start

    while tokens[-1].type == DEDENT:
        tokens.pop()

    if tokens[-1].type != ENDMARKER:
        start = end = tokens[-1].end
        tokens.append(tknew(ENDMARKER, '', start, end, line=''))

    # tkprint(tokens)

    tokens = [tk.to_token_info() for tk in tokens]
    try:
        return tokenize.untokenize(tokens)
    except ValueError:
        for idx, tk in enumerate(tokens):
            a, b = tk.start
            c, d = last_pos
            if (a < c) or (a == c and d > b):
                fmt = idx, tokens[idx - 1], tk
                print(tokens)
                raise ValueError(
                    'tokens sobrepõe a partir de #%s:\n\t%s\n\t%s)' % fmt)
            last_pos = tk.end
        else:
            raise
Example #28
0
    def gen_lambdas():
        def gen():
            yield src + "\n"

        g = gen()
        step = 0
        tokens = []
        for tok in tokenize.generate_tokens(getattr(g, "next", getattr(g, "__next__", None))):
            if step == 0:
                if tok[0] == tokenize.NAME and tok[1] == "lambda":
                    step = 1
                    tokens = [tok]
                    level = 0
            elif step == 1:
                if tok[0] == tokenize.NAME:
                    tokens.append(tok)
                    step = 2
                else:
                    step = 0
            elif step == 2:
                if tok[0] == tokenize.OP and tok[1] == ":":
                    tokens.append(tok)
                    step = 3
                else:
                    step = 0
            elif step == 3:
                if level == 0 and (tok[0] == tokenize.OP and tok[1] in ",)" or tok[0] == tokenize.ENDMARKER):
                    yield tokenize.untokenize(tokens).strip()
                    step = 0
                else:
                    tokens.append(tok)
                    if tok[0] == tokenize.OP:
                        if tok[1] in "[({": level += 1
                        if tok[1] in "])}": level -= 1
        assert not tokens
 def __init__(self, *args, **kwargs):
     codecs.StreamReader.__init__(self, *args, **kwargs)
     data = tokenize.untokenize(translate(self.stream.readline))
     logging.debug('START RESULT')
     logging.debug(data)
     logging.debug('END RESULT')
     self.stream = StringIO.StringIO(data)
Example #30
0
def nocomment(s):
    result = []
    g = tokenize.generate_tokens(io.BytesIO(s).readline)  
    for toknum, tokval, _, _, _  in g:
        if toknum != tokenize.COMMENT:
            result.append((toknum, tokval))
    return tokenize.untokenize(result)
Example #31
0
    def from_string(cls, input_string):
        """Parse linear expression mathematical units and return a quantity object.
        """

        if not input_string:
            return cls()

        input_string = string_preprocessor(input_string)

        if '[' in input_string:
            input_string = input_string.replace('[', '__obra__').replace(']', '__cbra__')
            reps = True
        else:
            reps = False

        gen = ptok(input_string)
        result = []
        for toknum, tokval, _, _, _ in gen:
            if toknum == NAME:
                if not tokval:
                    continue
                result.extend([
                    (NAME, 'L_'),
                    (OP, '('),
                    (STRING, '"' + tokval + '"'),
                    (OP, ')')
                ])
            else:
                result.append((toknum, tokval))

        ret = eval(untokenize(result),
                   {'__builtins__': None},
                   {'L_': cls.from_word})
        if isinstance(ret, Number):
            return ParserHelper(ret)

        if not reps:
            return ret

        return ParserHelper(ret.scale,
                            {key.replace('__obra__', '[').replace('__cbra__', ']'): value
                            for key, value in ret.items()})
Example #32
0
def _change_text(text):
    """Pre-processing of the input text.

    - Wrap constant parameters:

      ``a = 1`` is converted as ``a = _CONVERT_VARIABLE(EXPR="1")``

    - Wrap comments:

      ``# line of comment.`` is converted as
      ``_CONVERT_COMMENT(EXPR="# line of comment.")``

    Returns:
        list[int]: list of line numbers of end of instruction.
        str: changed text.
    """
    generator = tokenize.generate_tokens(StringIO(text).readline)
    result = []
    buff = []
    eoi = []
    started = False
    for ret in generator:
        num, val = ret[:2]
        started = started or num == token.NAME
        # _debug_parse(num, val, ret[4])
        if num == token.NEWLINE:
            eoi.append(ret[2][0])
        buff.append((num, val))
        if num in (token.NEWLINE, token.ENDMARKER):
            buff = _replace_variable(buff)
            started = False
        elif num == tokenize.COMMENT and len(buff) == 1:
            # ignore inline comment
            buff = _replace_comment(buff)
            started = False
        if not started:
            result.extend(buff)
            # _debug_parse(tokenize.COMMENT, "> > > new buffer > > >", "???")
            buff = []
    changed = tokenize.untokenize(result)
    debug_message("Pre-processed text:\n", changed)
    return eoi, changed
Example #33
0
def pre_parse(code):
    result = []

    try:
        g = tokenize(io.BytesIO(code.encode('utf-8')).readline)
        for token in g:
            # Alias contract definition to class definition.
            if token.type == COMMENT and "@version" in token.string:
                parse_version_pragma(token.string[1:])
            if (token.type, token.string, token.start[1]) == (NAME, "contract", 0):
                token = TokenInfo(token.type, "class", token.start, token.end, token.line)
            # Prevent semi-colon line statements.
            elif (token.type, token.string) == (OP, ";"):
                raise StructureException("Semi-colon statements not allowed.", token.start)

            result.append(token)
    except TokenError as e:
        raise StructureException(e.args[0], e.args[1]) from e

    return untokenize(result).decode('utf-8')
Example #34
0
def prg2py_after_preproc(data, parser_start, input_filename):
    input_stream = antlr4.InputStream(data)
    lexer = VisualFoxpro9Lexer(input_stream)
    stream = antlr4.CommonTokenStream(lexer)
    parser = VisualFoxpro9Parser(stream)
    tree = run_parser(stream, parser, parser_start)
    TreeCleanVisitor().visit(tree)
    output_tree = PythonConvertVisitor(input_filename).visit(tree)
    if not isinstance(output_tree, list):
        return output_tree
    output = add_indents(output_tree, 0)
    options = autopep8.parse_args(['--max-line-length', '100000', '-'])
    output = autopep8.fix_code(output, options)
    tokens = list(tokenize.generate_tokens(io.StringIO(output).readline))
    for i, token in enumerate(tokens):
        token = list(token)
        if token[0] == tokenize.STRING and token[1].startswith('u'):
            token[1] = token[1][1:]
        tokens[i] = tuple(token)
    return tokenize.untokenize(tokens)
	def fixLazyJson (self, in_text):
		tokengen = tokenize.generate_tokens(StringIO(in_text).readline)
		result = []
		for tokid, tokval, _, _, _ in tokengen:
			if (tokid == token.NAME):
				if tokval not in ['true', 'false', 'null', '-Infinity', 'Infinity', 'NaN']:
					tokid = token.STRING
					tokval = u'"%s"' % tokval
			elif (tokid == token.STRING):
				if tokval.startswith ("'"):
					tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"')
			elif (tokid == token.OP) and ((tokval == '}') or (tokval == ']')):
				if (len(result) > 0) and (result[-1][1] == ','):
					result.pop()			
			elif (tokid == token.STRING):
				if tokval.startswith ("'"):
					tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"')
			result.append((tokid, tokval))

		return tokenize.untokenize(result)
Example #36
0
def inspect_signature(obj):
    """
    Custom signature inspection primarily for cython generated callables.

    Cython puts the signatures to the first line of the docstrings, which we
    can reuse to parse the python signature from, but some gymnastics are
    required, like removing the cython typehints.

    It converts the cython signature:
        array(obj, type=None, mask=None, size=None, from_pandas=None,
              bool safe=True, MemoryPool memory_pool=None)
    To:
        <Signature (obj, type=None, mask=None, size=None, from_pandas=None,
                    safe=True, memory_pool=None)>
    """
    cython_signature = obj.__doc__.splitlines()[0]
    cython_tokens = _tokenize_signature(cython_signature)
    python_tokens = _convert_typehint(cython_tokens)
    python_signature = tokenize.untokenize(python_tokens)
    return inspect._signature_fromstr(inspect.Signature, obj, python_signature)
Example #37
0
def 中翻英檔案(存放目錄, 待翻譯檔案, 新檔案名= None):

    if 存放目錄== "":
        pass
    elif not os.path.exists(存放目錄):
        os.mkdir(存放目錄)
    f= open(待翻譯檔案, 'r', encoding='utf-8')
    程式碼= f.read()
    f.close()
    英文化程式碼= 中翻英後處理翻譯(程式碼)
    f= open('temp12321.py', 'w', encoding='utf-8')
    f.write(英文化程式碼)
    f.close()
    
    ## 關鍵處理,把程式分塊,(tokenize),
    #    
    # 切出所有 變數,函數,物類,方法 及它們的形態(type)。
    #
    程式碼, Token表= 剖析程式碼('temp12321.py')
    
    os.remove("temp12321.py")
    Token表= 中翻英名稱翻譯(Token表)



    英文化程式碼= tn.untokenize(Token表) # 就這樣一行搞定!
      # 後處理,大多是暴力法 字串取代。

    #
    # 把翻譯過的程式 個別 存起來,
    # print(新檔案名)
    if 新檔案名== None:
        新檔案名= 'te_'+ os.path.basename(待翻譯檔案)
    if 存放目錄!= "":
        新檔案名= 存放目錄 + os.path.sep + 新檔案名
    print(新檔案名)
    翻譯後檔案= open(新檔案名,'w', encoding= 'utf-8')
    翻譯後檔案.write(英文化程式碼)
    翻譯後檔案.close()
    
    return 程式碼,英文化程式碼,Token表
Example #38
0
def compile_as_decimal(expr):
    '''This function takes as expression give as an argument to
    one of the verbs like arr or filter or sort or tap, and compiles
    it so that we can execute it more efficiently.
    Two little bits of syntactic sugar are applied to the expression:
    First we make all tokens that look like floats (NUMBER and
    contains '.') into Decimals, so that we avoid the normal FP accuracy &
    rounding issues.  Second we translate '?' into a (decimal) random number.

    There are two bits of syntax sugar to help when calling tab from Vi, to avoid
    the need to escape ! and % you can write <> for != and ' mod ' for %.

    '''
    clean_expression = expr.replace('<>', '!=')
    clean_expression = re.sub(r'\bmod\b', '%', clean_expression)
    clean_expression = re.sub(r'(?<![<>!])=+', '==',
                              clean_expression)  # also allow a=b
    out = []
    try:
        for tn, tv, _, _, _ in tokenize.generate_tokens(
                io.StringIO(clean_expression).readline):
            if tn == tokenize.NUMBER and '.' in tv:
                out.append((tokenize.NAME, 'Decimal'))
                out.append((tokenize.OP, '('))
                out.append((tokenize.STRING, repr(tv)))
                out.append((tokenize.OP, ')'))
            elif tv == '?':
                out.append((tokenize.NAME, 'randomd'))
                out.append((tokenize.OP, '('))
                out.append((tokenize.OP, ')'))
            else:
                out.append((tn, tv))
    except tokenize.TokenError:
        return (False, '?! tokens ' + expr)

    try:
        cc = compile(tokenize.untokenize(out), "<string>", 'eval')
    except (SyntaxError, ValueError):
        return (False, '?! syntax ' + expr)
    else:
        return (True, cc)
Example #39
0
def eval_arguments(args):
    args = args.strip()
    if not args or (args == '()'):
        return ()
    tokens = list(tokenize.generate_tokens(StringIO(args).readline))

    def remap():
        for type, name, _, _, _ in tokens:
            if type == tokenize.NAME and name not in REMAPPINGS:
                yield tokenize.STRING, '"%s"' % name
            else:
                yield type, name

    untok = tokenize.untokenize(remap())
    if untok[1:-1].strip():
        untok = untok[:-1] + ',)'  # Force a tuple.
    try:
        return eval(untok, REMAPPINGS)
    except Exception as e:
        raise ValueError('Couldn\'t evaluate expression "%s" (became "%s"), '
                         'error "%s"' % (args, untok, str(e)))
Example #40
0
def generate_ctypes(header_file, py_file, cpp_flags):
    logging.info("Generating %s from %s", py_file, header_file)

    buffer = io.StringIO()
    ctypeslib.codegen.codegenerator.generate_code([header_file], buffer,
                                                  types=(ctypeslib.codegen.typedesc.Alias,
                                                         ctypeslib.codegen.typedesc.Structure,
                                                         ctypeslib.codegen.typedesc.Variable,
                                                         ctypeslib.codegen.typedesc.Enumeration,
                                                         ctypeslib.codegen.typedesc.Function,
                                                         ctypeslib.codegen.typedesc.Macro,
                                                         ctypeslib.codegen.typedesc.Typedef,
                                                         ctypeslib.codegen.typedesc.Union),
                                                  filter_location=True,
                                                  flags=cpp_flags)

    bytes_buffer = io.BytesIO(buffer.getvalue().encode())
    bytes = tokenize.untokenize(rewrite_ctypes_little_endian(bytes_buffer.readline))

    with open(py_file, 'wb') as outfile:
        outfile.write(bytes)
Example #41
0
def indent(code, indentation=4, count=1):
    """Remove indentation at the beginning and end."""
    tokens = [(x[0], x[1]) for x in _tokenize(code)]

    for _ in range(count):
        tokens_ = []
        for token in tokens:
            if token[0] == INDENT:
                token = (INDENT, indentation * " " + token[1])
            tokens_.append(token)
        tokens = tokens_
        if tokens[0][0] != INDENT:
            tokens = [(INDENT, indentation * " ")
                      ] + tokens[:-1] + [(DEDENT, "")] + [tokens[-1]]
    logger.debug(tokens)

    tokens = [(NEWLINE, "\n")] + tokens
    result = untokenize(tokens)
    result = "\n".join(result.split("\n")[1:])

    return result
Example #42
0
    def parse_expression(self, input_string):
        """Parse a mathematical expression including units and return a quantity object.
        """

        if not input_string:
            return self.Quantity(1)

        input_string = string_preprocessor(input_string)
        gen = ptok(input_string)
        result = []
        unknown = set()
        for toknum, tokval, _, _, _ in gen:
            if toknum in (STRING, NAME):  # replace NUMBER tokens
                # TODO: Integrate math better, Replace eval
                if tokval == 'pi':
                    result.append((toknum, str(math.pi)))
                    continue
                try:
                    tokval = self.get_name(tokval)
                except UndefinedUnitError as ex:
                    unknown.add(ex.unit_names)
                if tokval:
                    result.extend([(NAME, 'Q_'), (OP, '('), (NUMBER, '1'),
                                   (OP, ','), (NAME, 'U_'), (OP, '('),
                                   (STRING, tokval), (OP, '='), (NUMBER, '1'),
                                   (OP, ')'), (OP, ')')])
                else:
                    result.extend([(NAME, 'Q_'), (OP, '('), (NUMBER, '1'),
                                   (OP, ','), (NAME, 'U_'), (OP, '('),
                                   (OP, ')'), (OP, ')')])
            else:
                result.append((toknum, tokval))

        if unknown:
            raise UndefinedUnitError(unknown)
        return eval(untokenize(result), {'__builtins__': None}, {
            'REGISTRY': self._units,
            'Q_': self.Quantity,
            'U_': UnitsContainer
        })
Example #43
0
def code_analysis_py(program_contents):
    "count lines and words in python"
    f = io.BytesIO(program_contents.encode())
    g = tokenize.tokenize(f.readline)
    processed_tokens = []
    for tok in g:
        t_type = tok[0]
        if t_type not in [tokenize.COMMENT]:
            processed_tokens.append(tok)

    # remove the docstring
    i = 0
    while processed_tokens[i][0] == tokenize.NL:
        i = i + 1
    if processed_tokens[i][0] == tokenize.STRING:
        processed_tokens = processed_tokens[i + 1:]
    # remove strings

    newtok = []
    i = 0
    while (i < len(processed_tokens) - 2):
        if processed_tokens[i][0] == tokenize.INDENT:
            pass
            #print('a',processed_tokens[i],processed_tokens[i+1],processed_tokens[i+2])
            #print('b',tokenize.INDENT,tokenize.STRING,tokenize.NEWLINE)
        if processed_tokens[i][0] == tokenize.INDENT \
                and processed_tokens[i+1][0] == tokenize.STRING \
                and processed_tokens[i+2][0] == tokenize.NEWLINE:
            i += 3
        newtok.append(processed_tokens[i])
        i += 1

    newtok = newtok + processed_tokens[i:i + 2]
    #for t in newtok:
    #    print(t)
    src = "\n".join(x
                    for x in tokenize.untokenize(newtok).decode().splitlines()
                    if x.strip() and x != "\\")

    return {'lines': len(src.splitlines()), 'words': len(src.split())}
Example #44
0
def get_code_str_and_surrounding(frame) -> Tuple[str, Surrounding]:
    """Gets code string and surrounding information for line event.

    The reason to record both code_str and surrounding is because code_str is not
    guaranteed to be unique, for example "a = true" appeared twice. While
    (frame_id, surrounding) is distinct, therefore we can detect duplicate computations
    by checking their (frame_id, surrounding).

    Both lineno and surrounding are 1-based, aka the smallest lineno is 1.
    """
    lineno = _get_lineno(frame)
    groups: List[List[tokenize.TokenInfo]] = _get_module_token_groups(frame)

    # Given a lineno, locates the logical line that contains this line.
    if len(groups) == 1:
        return (
            inspect.getsource(frame),
            Surrounding(start_lineno=lineno, end_lineno=lineno),
        )

    for group, next_group in zip(groups[:-1], groups[1:]):
        start_lineno, end_lineno = group[0].start[0], group[-1].end[0]
        if start_lineno <= lineno <= end_lineno:
            break
    else:
        # Reachs end of groups
        group = next_group

    # Removes leading NL and DEDENT as they cause untokenize to fail.
    while group[0].type in {token_NL, token.DEDENT, token.INDENT}:
        group.pop(0)
    # When untokenizing, Python adds \\\n for absent lines(because lineno in
    # group doesn't start from 1), removes them.
    # Note that since we've removed the leading ENCODING token, untokenize will return
    # a str instead of encoded bytes.
    return (
        tokenize.untokenize(group).lstrip("\\\n"),
        Surrounding(start_lineno=group[0].start[0],
                    end_lineno=group[-1].end[0]),
    )
def tiefighter(readline):
    source_tokens = list(tokenize.tokenize(readline))
    modified_source_tokens = source_tokens.copy()

    def inc(token, by=1, page=0):
        start = list(token.start)
        end = list(token.end)

        start[page] += by
        end[page] += by

        return token._replace(start=tuple(start), end=tuple(end))

    for index, token in enumerate(source_tokens):
        if token.exact_type == tokens.TIEFIGHTER:
            cxx = index - 1

            left = modified_source_tokens.pop(cxx)
            __op = modified_source_tokens.pop(cxx)
            right = modified_source_tokens.pop(cxx)

            stmt_start = modified_source_tokens[cxx - 1]
            stmt_end = modified_source_tokens.pop(cxx)
            new_line = modified_source_tokens.pop(cxx)

            pattern = io.BytesIO(
                f"abs({left.string}) == abs({right.string})\n".encode("utf8"))
            absolute_comp = list(tokenize.tokenize(pattern.readline))[1:-2]

            stmt_end = inc(stmt_end, absolute_comp[-1].end[1], 1)
            new_line = inc(new_line, stmt_end.end[1] - new_line.start[1], 1)
            modified_source_tokens.insert(cxx, new_line)
            modified_source_tokens.insert(cxx, stmt_end)

            for token in reversed(absolute_comp):
                token = inc(token, by=stmt_start.end[0] - 1)
                token = inc(token, by=stmt_start.end[1] + 1, page=1)
                modified_source_tokens.insert(cxx, token)

    return tokenize.untokenize(modified_source_tokens)
def decistmt(s):
    """Substitute Decimals for floats in a string of statements.

    >>> from decimal import Decimal
    >>> s = 'print(+21.3e-5*-.1234/81.7)'
    >>> decistmt(s)
    "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"

    The format of the exponent is inherited from the platform C library.
    Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
    we're only showing 12 digits, and the 13th isn't close to 5, the
    rest of the output should be platform-independent.

    >>> exec(s)  #doctest: +ELLIPSIS
    -3.21716034272e-0...7

    Output from calculations with Decimal should be identical across all
    platforms.

    >>> exec(decistmt(s))
    -3.217160342717258261933904529E-7
    """
    result = []

    g = tokenize(BytesIO(s.encode('utf-8')).readline)  # tokenize the string

    for toknum, tokval, _, _, _ in g:
        if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens

            result.extend([
                (NAME, 'Decimal'),
                (OP, '('),
                (STRING, repr(tokval)),
                (OP, ')')
            ])

        else:
            result.append((toknum, tokval))

    return untokenize(result).decode('utf-8')
Example #47
0
def unescape(code_str):
    """Substitutes '{{' by indents and '}}' by dedents.

    Args:
      code_str: The 1-line Python snippet.

    Returns:
      Standard valid Python as a string.

    Raises:
      Error: The conversion failed.
    """
    code_file = io.StringIO(code_str)
    tokens = tokenize.generate_tokens(code_file.readline)
    try:
        unescaped_tokens = list(_unescape_tokens(tokens))
        return tokenize.untokenize(unescaped_tokens)
    except tokenize.TokenError as e:
        raise CannotTokenize(message=e.args[0], position=e.args[1])
    except IndentationError as e:
        raise CannotTokenize(message=e.args[0],
                             position=(e.args[1][1], e.args[1][2]))
Example #48
0
def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = s + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]
Example #49
0
    def untokenize_abstract(self, whole_tokens):
        # Reconstruct Python tokenizer tuples, so that Python's untokenize can be
        # invoked.
        token_tuples: List[Tuple[int, str]] = []

        for whole_token in whole_tokens:
            if whole_token in PythonTokenizer._EXACT_TOKEN_TYPES:
                token_tuples.append((tokenize.OP, whole_token))
            elif cubert_tokenizer.token_from_token_type(
                    tokenize.INDENT) in whole_token:
                # We baked the type and spelling into one token. Break them up.
                spelling = whole_token.replace(
                    cubert_tokenizer.token_from_token_type(tokenize.INDENT),
                    '')
                token_tuples.append((tokenize.INDENT, spelling))
            elif whole_token in PythonTokenizer._REVERSE_TOKEN_MAP:
                python_kind = PythonTokenizer._REVERSE_TOKEN_MAP[whole_token]
                if python_kind in (tokenize.DEDENT, tokenize.ENDMARKER,
                                   tokenize.ERRORTOKEN):
                    spelling = ''
                else:  # python_kind in (tokenize.NEWLINE, tokenize.NL)
                    spelling = '\n'
                token_tuples.append((python_kind, spelling))
            elif keyword.iskeyword(whole_token):
                token_tuples.append((tokenize.NAME, whole_token))
            elif PythonTokenizer._NUMBERS.match(whole_token):
                token_tuples.append((tokenize.NUMBER, whole_token))
            elif PythonTokenizer._SINGLE_STRINGS.match(whole_token):
                token_tuples.append((tokenize.STRING, whole_token))
            elif PythonTokenizer._TRIPLE_STRING_BEGINNINGS.match(whole_token):
                token_tuples.append((tokenize.STRING, whole_token))
            elif PythonTokenizer._COMMENTS.match(whole_token):
                token_tuples.append((tokenize.COMMENT, whole_token))
            else:
                # Everything else we map back to NAME.
                token_tuples.append((tokenize.NAME, whole_token))

        reconstructed = tokenize.untokenize(typing.cast(Any, token_tuples))
        return reconstructed
Example #50
0
def parse_python(path):
    """
    Look though a python file and extract the specified `LANG_FILES` constant
    value and return it.

    `LANG_FILES` must be defined at the module level, and can be a string or
    list of strings.
    """
    result = []
    in_lang = False
    in_lang_val = False
    with codecs.open(path, encoding='utf-8') as src_f:
        tokens = generate_tokens(src_f.readline)
        for token in tokens:
            t_type, t_val, (t_row, t_col) = token[:3]
            # find the start of the constant declaration
            if t_type == NAME and t_col == 0 and t_val == 'LANG_FILES':
                in_lang = True
                continue
            if in_lang:
                # we only want the value, so start recording after the = OP
                if t_type == OP and t_val == '=':
                    in_lang_val = True
                    continue
                # stop when there's a newline. continuation newlines are a
                # different type so multiline list literals work fine
                if t_type == NEWLINE:
                    break
                if in_lang_val:
                    result.append((t_type, t_val))

    if result:
        new_lang_files = eval(untokenize(result))
        if isinstance(new_lang_files, basestring):
            new_lang_files = [new_lang_files]
        # remove empties
        return [lf for lf in new_lang_files if lf]
    return []
Example #51
0
    def gen_lambdas():

        def gen():
            yield src + "\n"

        g = gen()
        step = 0
        tokens = []
        for tok in tokenize.generate_tokens(
                getattr(g, "next", getattr(g, "__next__", None))):
            if step == 0:
                if tok[0] == tokenize.NAME and tok[1] == "lambda":
                    step = 1
                    tokens = [tok]
                    level = 0
            elif step == 1:
                if tok[0] == tokenize.NAME:
                    tokens.append(tok)
                    step = 2
                else:
                    step = 0
            elif step == 2:
                if tok[0] == tokenize.OP and tok[1] == ":":
                    tokens.append(tok)
                    step = 3
                else:
                    step = 0
            elif step == 3:
                if level == 0 and (tok[0] == tokenize.OP and tok[1] in ",)" or
                                   tok[0] == tokenize.ENDMARKER):
                    yield tokenize.untokenize(tokens).strip()
                    step = 0
                else:
                    tokens.append(tok)
                    if tok[0] == tokenize.OP:
                        if tok[1] in "[({": level += 2
                        if tok[1] in "])}": level -= 1
        assert not tokens
Example #52
0
def remove_comments(src):
    """
    This reads tokens using tokenize.generate_tokens and recombines them
    using tokenize.untokenize, and skipping comment/docstring tokens in between
    """
    f = cStringIO.StringIO(src)

    class SkipException(Exception):
        pass

    processed_tokens = []
    # go through all the tokens and try to skip comments
    for tok in tokenize.generate_tokens(f.readline):
        t_type, t_string, t_srow_scol, t_erow_ecol, t_line = tok
        try:
            if t_type == tokenize.COMMENT:
                raise SkipException()
        except SkipException:
            pass
        else:
            processed_tokens.append(tok)

    return tokenize.untokenize(processed_tokens)
Example #53
0
def decode(byteslike, errors="replace", *, ignore_first_line):
    read_code = io.BytesIO(bytes(byteslike)).readline
    if ignore_first_line:
        read_code()  # its the encoding comment

    tokens = list(tokenize(read_code))

    new_tokens = []
    for token in _token_iter(tokens):
        if token.string in _viking_to_english:
            new_tokens.append(
                TokenInfo(
                    token.type,
                    _viking_to_english[token.string],
                    token.start,
                    token.end,
                    token.line,
                )  # its a copy with token.string replaced
            )
        else:
            new_tokens.append(token)

    return str(untokenize(new_tokens), "utf-8"), len(byteslike)
Example #54
0
def _decorate_variables(expression, variable_store):
    variable_started = False
    variable_found = False
    tokens = []
    for toknum, tokval, _, _, _ in generate_tokens(
            StringIO(expression).readline):
        if variable_started:
            if toknum == token.NAME:
                if tokval not in variable_store:
                    variable_not_found(
                        '$%s' % tokval,
                        variable_store.as_dict(decoration=False),
                        deco_braces=False)
                tokval = 'RF_VAR_' + tokval
                variable_found = True
            else:
                tokens.append((token.ERRORTOKEN, '$'))
            variable_started = False
        if toknum == token.ERRORTOKEN and tokval == '$':
            variable_started = True
        else:
            tokens.append((toknum, tokval))
    return untokenize(tokens).strip() if variable_found else expression
    def __init__(self, source_code_path: str):

        tokens = []
        file_name = os.path.basename(source_code_path)
        with open(source_code_path, "r") as source_code:
            source_raw = source_code.read()
            source_code_copy_str = StringIO(source_raw)
            source_code_copy_byte = BytesIO(source_raw.encode("UTF-8"))

        source_code_iter = tokenize.tokenize(source_code_copy_byte.readline)

        for token_type, token_val, *_ in source_code_iter:
            if token_type == tokenize.COMMENT:
                temp_token_val = token_val[1:]
                res = Parser.parse_and_expand_instruction(temp_token_val)
                tokens.extend(res)
            else:
                tokens.append((token_type, token_val))

        source = tokenize.untokenize(tokens)

        with open("{}_parsed.py".format(file_name), "wb") as s:
            s.write(source)
    def _get_trait_definition(self):
        """ Retrieve the Trait attribute definition
        """

        # Get the class source and tokenize it.
        source = inspect.getsource(self.parent)
        string_io = StringIO.StringIO(source)
        tokens = tokenize.generate_tokens(string_io.readline)

        # find the trait definition start
        trait_found = False
        name_found = False
        while not trait_found:
            item = next(tokens)
            if name_found and item[:2] == (token.OP, '='):
                trait_found = True
                continue
            if item[:2] == (token.NAME, self.object_name):
                name_found = True

        # Retrieve the trait definition.
        definition_tokens = _get_definition_tokens(tokens)
        return tokenize.untokenize(definition_tokens).strip()
Example #57
0
def transform_source(text):
    '''Replaces instances of

        repeat n:
    by

        for __VAR_i in range(n):

    where __VAR_i is a string that does not appear elsewhere
    in the code sample.
    '''

    loop_keyword = 'repeat'

    nb = text.count(loop_keyword)
    if nb == 0:
        return text

    var_names = get_unique_variable_names(text, nb)

    toks = tokenize.generate_tokens(StringIO(text).readline)
    result = []
    replacing_keyword = []
    for toktype, tokvalue, start, _, _ in toks:
        if toktype == tokenize.NAME and tokvalue == loop_keyword:
            result.extend([(tokenize.NAME, 'for'),
                           (tokenize.NAME, var_names.pop()),
                           (tokenize.NAME, 'in'), (tokenize.NAME, 'range'),
                           (tokenize.OP, '(')])
            replacing_keyword.append(start[0])
        elif replacing_keyword and tokvalue == ':':
            if start[0] != replacing_keyword.pop():
                raise SyntaxError("colon and 'repeat' must be on same line")
            result.extend([(tokenize.OP, ')'), (tokenize.OP, ':')])
        else:
            result.append((toktype, tokvalue))
    return tokenize.untokenize(result)
Example #58
0
def transform_source(src):
    toks = tokenize.generate_tokens(StringIO(src).readline)
    result = []
    last_name = None
    last_plus = False
    for toktype, tokvalue, _, _, _ in toks:
        if toktype == tokenize.NAME:
            if last_name is not None:  # two names in a row: not an increment
                result.append((tokenize.NAME, last_name))
                result.append((tokenize.NAME, tokvalue))
                last_name = None
            else:
                last_name = tokvalue
        elif last_name is not None:
            if toktype == tokenize.OP and tokvalue == '+':
                if last_plus:
                    result.extend([(tokenize.NAME, last_name),
                                   (tokenize.OP, '='),
                                   (tokenize.NAME, last_name),
                                   (tokenize.OP, '+'), (tokenize.NUMBER, '1')])
                    last_plus = False
                    last_name = None
                else:
                    last_plus = True
            else:
                result.append((tokenize.NAME, last_name))
                if last_plus:
                    result.append((tokenize.OP, '+'))
                    last_plus = False
                result.append((toktype, tokvalue))
                last_name = None
        else:
            result.append((toktype, tokvalue))

    if last_name:
        result.append((tokenize.NAME, last_name))
    return tokenize.untokenize(result)
Example #59
0
def transform_settings(app_name, filename, key, transformer):
    """Get the settings specified in the config file under `security` section.

    It looks for the comma separated list of setting names specified in the
    config file under the `security` section and the `secure_settings` option.

    Params
        app_name: Application name. The settings of this app are the ones
                  parsed.
        filename: Name of the file holding the settings.

    Raises
        ValueError if no settings are found for that app name.

    Returns
        A list of tuples where the first element of the tuple is the setting
        name and second element the setting value.
    """
    settings_path = os.path.join(get_current_path(), app_name, filename)
    if not os.path.isfile(settings_path):
        msg = "No settings found for {0!r} app".format(app_name)
        raise ValueError(msg)

    secure_settings_string = config("secure_settings", section="security")
    if secure_settings_string is not None:
        secure_settings = set(s.strip()
                              for s in secure_settings_string.split(","))

        with open(settings_path) as f:
            cipher_tokens = []
            callback = parse_settings(cipher_tokens, secure_settings, key,
                                      transformer)
            tokenize.tokenize(f.readline, callback)
            cipher_settings = tokenize.untokenize(cipher_tokens)

        with open(settings_path, "w") as f:
            f.write(cipher_settings)
Example #60
0
def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    return tokenize.untokenize(tokens)