def next(self):
     try:
         tok = self._queue.pop()
     except IndexError:
         tok = next(self.tokens)
         # foo.1 will be split into NAME(foo), NUMBER(.1)
         # We need to re-work this to yield NAME(foo), OP(.), NUMBER(1)
         if tok.exact_type == tokenize.NUMBER and tok.string.startswith(
                 '.'):
             # Split into two tokens
             self._queue.append(
                 tokenize.TokenInfo(
                     type=tokenize.NUMBER,
                     string=tok.string,
                     start=tok.start,
                     end=tok.end,
                     line=tok.line,
                 ))
             tok = tokenize.TokenInfo(
                 type=tokenize.DOT,
                 string='.',
                 start=tok.start,
                 end=tok.end,
                 line=tok.line,
             )
             return tok
     return tok
Exemple #2
0
    def fix_line(line):

        if len(line) < 2:
            return line

        if line[0].type != token.NAME:
            return line

        if line[0].string != "print":
            return line

        if line[1].exact_type == token.LPAR:
            return line

        if line[1].exact_type == token.RIGHTSHIFT:
            newline = line[2:]
        else:
            newline = line[1:]

        # Replace the print statement 0, arguments.
        old = line[0]
        newline.insert(0, tokenize.TokenInfo(token.NUMBER, "0", old.start, old.start, old.line))
        newline.insert(1, tokenize.TokenInfo(token.OP, ",", old.end, old.end, old.line))

        return newline
Exemple #3
0
def translate_ptl(tokens):
    i = 0
    while i < len(tokens):
        tok = tokens[i]
        if tok.type == tokenize.NAME and tok.string == 'def':
            if (tokens[i + 2][:2] == (tokenize.OP, '[')
                    and tokens[i + 3][1] in {'html', 'plain'}
                    and tokens[i + 4][:2] == (tokenize.OP, ']')):
                template_type = tokens[i + 3][1]
                template_func = 'ptl_' + template_type
                t = tokens[i + 1]
                line, col = tok.start
                # delete extra tokens from [html] or [plain]
                tokens[i + 1:i + 4] = []
                end = (t.end[0], t.end[1] + len(template_type) + 4)
                start = (t.start[0], tok.end[1] + 1)
                indent = ' ' * tok.start[1]
                # putting newline in tok.string is ugly but it works
                s = '@' + template_func + '\n' + indent + tok.string
                tokens[i] = tokenize.TokenInfo(tok.type, s, tok.start, tok.end,
                                               tok.line)
                tokens[i + 1] = tokenize.TokenInfo(t.type, t.string, start,
                                                   end, t.line)
                #print('tokens', tokens[i-1:i+3])
        elif tok.type == tokenize.NAME and tok.string == 'h':
            try:
                str_tok = tokens[i + 1]
                if str_tok.type == tokenize.STRING:
                    t = tokens[i]
                    tokens[i] = tokenize.TokenInfo(t.type, 'F', t.start, t.end,
                                                   t.line)
            except IndexError:
                pass
        i += 1
Exemple #4
0
def translate_ptl(tokens):
    i = 0
    while i < len(tokens):
        tok = tokens[i]
        if tok.type == tokenize.NAME and tok.string == 'def':
            if (tokens[i + 2][:2] == (tokenize.OP, '[')
                    and tokens[i + 3][1] in {'html', 'plain'}
                    and tokens[i + 4][:2] == (tokenize.OP, ']')):
                template_type = tokens[i + 3][1]
                t = tokens[i + 1]
                tokens[i + 1:i + 4] = []
                end = t.end
                if str(template_type) == 'plain':
                    prefix = PLAIN_PREFIX
                else:
                    prefix = HTML_PREFIX
                tokens[i + 1] = tokenize.TokenInfo(
                    t.type,
                    prefix + t.string,
                    t.start,
                    (end[0], end[1] + len(prefix)),
                    t.line,
                )
        elif tok.type == tokenize.NAME and tok.string == 'h':
            try:
                str_tok = tokens[i + 1]
                if str_tok.type == tokenize.STRING:
                    t = tokens[i]
                    tokens[i] = tokenize.TokenInfo(t.type, 'F', t.start, t.end,
                                                   t.line)
            except IndexError:
                pass
        i += 1
Exemple #5
0
def googleTranslate(tokenL):

    gTranslator = ryGoogleTranslate.Translator(from_lang='auto',
                                               to_lang='zh-tw')

    for n, t in enumerate(tokenL):

        if ((t.type == tn.STRING)
                and (('"""' in t.string) or ("'''" in t.string))):

            triQuot = '"""' if ('"""' in t.string) else "'''"

            text = t.string.strip(triQuot)
            #text= text.strip("'''")
            try:
                gTranslation = gTranslator.translate(text)
                gTranslation = google翻譯修正(gTranslation)
            except:
                gTranslation = 'Google翻譯失敗,網路可能有問題!保留原文 ... ' + text

            newString = "'''"  # 我愛用 三單引號 '''
            #newString += '== begin Google 翻譯 =='
            newString += gTranslation
            newString += '=== 以上由 Google 翻譯,請協助改善 ===\n'
            #newString += text  # 必要時,印出原文。the original english comment
            newString += "'''"

            t0 = t.type

            t1 = newString  #D[t.string]
            t2 = t.start  # 可能也要改
            t3 = t.end  # 可能也要改
            t4 = t.line  # 可能也要改

            tokenL[n] = tn.TokenInfo(t0, t1, t2, t3, t4)

        if ((t.type == tn.COMMENT) and ('#' in t.string)
                and ('#!' not in t.string)):

            text = t.string.strip('#')

            gTranslation = gTranslator.translate(text)
            gTranslation = google翻譯修正(gTranslation)

            newString = "# "

            newString += gTranslation
            newString += ''  #' ..by Google'

            t0 = t.type

            t1 = newString  #D[t.string]
            t2 = t.start  # 可能也要改
            t3 = t.end  # 可能也要改
            t4 = t.line  # 可能也要改

            tokenL[n] = tn.TokenInfo(t0, t1, t2, t3, t4)

    return tokenL
 def fix_token_error(self, err: Exception) -> tokenize.TokenInfo:
     msg = err.args[0]
     if msg == "EOF in multi-line statement":
         return tokenize.TokenInfo(token.ENDMARKER, "", (0, 0), (0, 0), "")
     elif msg == "EOF in multi-line string":
         return tokenize.TokenInfo(token.ERRORTOKEN, "", (0, 0), (0, 0), "")
     elif msg == "unindent does not match any outer indentation level":
         return tokenize.TokenInfo(token.ERRORTOKEN, "", (0, 0), (0, 0), "")
     else:
         raise err
Exemple #7
0
def 外部連結翻譯(標記表, from_l='auto', to_l='zh-tw'):

    翻譯器 = ryOuterTranslate.Translator(from_lang=from_l, to_lang=to_l)

    for n, t in enumerate(標記表):
        # 檢測字串是否為多行字串,是才進行翻譯
        if ((t.type == tn.STRING)
                and (('"""' in t.string) or ("'''" in t.string))):

            # 檢測是用哪種三引號'''或"""
            三引號 = '"""' if ('"""' in t.string) else "'''"
            # 將字串的前後三引號去掉
            待翻譯文字 = t.string.strip(三引號)
            try:
                翻譯後文字 = 翻譯器.translate(待翻譯文字)
                翻譯後文字 = 翻譯文字修正(翻譯後文字)
            except:
                翻譯後文字 = 'Google翻譯失敗,網路可能有問題!保留原文 ... ' + 待翻譯文字

            新字串 = "'''"  # 我愛用 三單引號 '''
            新字串 += 翻譯後文字
            新字串 += "'''"

            標記型態 = t.type

            標記字串 = 新字串  #D[t.string]
            開始位置 = t.start  # 可能也要改
            結束位置 = t.end  # 可能也要改
            整行文字 = t.line  # 可能也要改

            標記表[n] = tn.TokenInfo(標記型態, 標記字串, 開始位置, 結束位置, 整行文字)
        # 檢測是否為註解,如為#!則不進行
        if ((t.type == tn.COMMENT) and ('#' in t.string)
                and ('#!' not in t.string)):
            # 將字串的#去掉
            待翻譯文字 = t.string.strip('#')

            翻譯後文字 = 翻譯器.translate(待翻譯文字)
            翻譯後文字 = 翻譯文字修正(翻譯後文字)

            新字串 = "# "

            新字串 += 翻譯後文字
            新字串 += ''  #' ..by Google'

            標記型態 = t.type

            標記字串 = 新字串  #D[t.string]
            開始位置 = t.start  # 可能也要改
            結束位置 = t.end  # 可能也要改
            整行文字 = t.line  # 可能也要改

            標記表[n] = tn.TokenInfo(標記型態, 標記字串, 開始位置, 結束位置, 整行文字)

    return 標記表
Exemple #8
0
    async def post(self):
        path = configuration.PROJECT_PATH / self.request.match_info.get(
            'file_name', '')
        body = await self.request.json()
        print('INPUT BODY:', body)

        token_info = None
        for i in tokenize.tokenize(
                BytesIO(body['code_string'].encode('utf8')).readline):
            if i.end[1] >= body['cursor_position']:
                token_info = tokenize.TokenInfo(
                    type=i.type,
                    string=i.string,
                    start=(body['code_line_number'], i.start[1]),
                    end=(body['code_line_number'], i.end[1]),
                    line=i.line)
                break

        node_info, node_scope_id, scope_tree = AstParser.search_token(
            token_info, AST_PARSER[path]['ast_tree'])
        node_info = AstParser.get_token_definition(token_info, node_scope_id,
                                                   scope_tree)
        print('node_info in gotoddefinition: ', node_info, node_info.lineno)

        if node_info:
            # TODO add file name in case cross file definition
            return aiohttp.web.json_response({
                'code_line_number':
                node_info.lineno - 1,
                'cursor_position':
                node_info.col_offset
            })
        else:
            raise aiohttp.web.HTTPNotFound(
                text=f'Go to definition not found: {body}')
Exemple #9
0
    def parse(self, token: tokenize.TokenInfo,
              line_tokens: List[tokenize.TokenInfo], args: SimpleNamespace,
              token_dict: Dict[str, Any], *_args: Any,
              **kwargs: Any) -> tokenize.TokenInfo:
        """Make furbFURB prefixes lowercase.

        Original from https://github.com/psf/black
        """
        if args.lowercase_string_prefix:
            if token.type != tokenize.STRING:
                return token

            leaf = token.string
            match = re.match(r"^([furbFURB]*)(.*)$", leaf, re.DOTALL)
            assert match is not None, f"failed to match string {leaf!r}"
            orig_prefix = match.group(1)
            new_prefix = orig_prefix
            new_prefix = new_prefix.replace("F", "f")
            new_prefix = new_prefix.replace("U", "u")
            new_prefix = new_prefix.replace("R", "r")
            new_prefix = new_prefix.replace("B", "b")
            leaf = f"{new_prefix}{match.group(2)}"

            token = tokenize.TokenInfo(type=token.type,
                                       string=leaf,
                                       start=token.start,
                                       end=token.end,
                                       line=token.line)
        return token
Exemple #10
0
 def getnext(self) -> tokenize.TokenInfo:
     """Return the next token and updates the index."""
     cached = True
     while self._index == len(self._tokens):
         tok = next(self._tokengen)
         if tok.type in (
                 tokenize.COMMENT,
                 tokenize.INDENT,
                 tokenize.DEDENT,
         ):
             continue
         # Transform NL to NEWLINE
         if tok.type == token.NL:
             tok = tokenize.TokenInfo(
                 token.NEWLINE,
                 tok.string,
                 start=tok.start,
                 end=tok.end,
                 line=tok.line,
             )
         if tok.type == token.ERRORTOKEN and tok.string.isspace():
             continue
         self._tokens.append(tok)
         cached = False
     tok = self._tokens[self._index]
     self._index += 1
     if self._verbose:
         self.report(cached, False)
     return tok
Exemple #11
0
    def _gen_eval_tokens(self, tokens):
        """Wrap tokens in parens so multi-line input works as expected"""

        # untokenize() stops at ENDMARKER, causing rparen to be discarded.
        tokens = [t for t in tokens if t.type != tokenize.ENDMARKER]

        if not tokens:
            tokens = [tokenize.TokenInfo(
                tokenize.NAME, 'None', (1, 0), (1, 4), 'None')]

        last = tokens[-1]
        lparen = tokenize.TokenInfo(tokenize.OP, '(', (1, 0), (1, 0), '(')
        rparen = tokenize.TokenInfo(tokenize.OP, ')', last.end,
                                    (last.end[0], last.end[1] + 1), ')')

        return [lparen] + tokens + [rparen]
    def test_simple_strings_are_not_passwords(self):
        """
        Tests whether simple strings with 2 character classes are passwords
        """

        tokens = [
            tokenize.TokenInfo(tokenize.STRING, "this-is-not-my-password",
                               (0, 0), (0, 0), ""),
            tokenize.TokenInfo(tokenize.STRING, "123456789", (0, 0), (0, 0),
                               ""),
            tokenize.TokenInfo(tokenize.STRING, "!@#$%^&**", (0, 0), (0, 0),
                               "")
        ]

        with self.assertNoMessages():
            self.checker.process_tokens(tokens)
Exemple #13
0
def substitueFormulaReference(token):
    """Given a token, inspect for cell references and return either
    the given token or tokens that represent formula.

    If a NAME token corresponds to a formula rather than a value,
    replace the NAME with the respective formula taking care the context
    of resulting NAMEs is preserved e.g.
    $Sheet1.A1 = A3 * A2 --> $Sheet1.A3 * $Sheet1.A2

    Parameters
    token : TokenInfo

    Returns
    [TokenInfo]
    """
    if token == None or token.type != tokenize.NAME: return [token]
    formula = getFormula(token.string)
    if formula == None: return [token]
    result = []
    formula = tokenize.untokenize(getTokens(formula))  # expunge leading =
    if '.' in token.string:  # qualified name
        sheet, cell = token.string.split('.')
        for tok in getTokens('(' + formula + ')'):
            if tok.type == tokenize.NAME and '.' not in tok.string:
                result.append(
                    tokenize.TokenInfo(tok.type, sheet + '.' + tok.string,
                                       tok.start, tok.end, tok.line))
            else:
                result.append(tok)
    else:
        result.extend(getTokens('(' + formula + ')'))
    return result[:-1]  # drop end marker
Exemple #14
0
    def __init__(self, error_info):
        super().__init__(error_info)

        self.tokens = []
        self.token_error = None

        if self.error_info["message"] == "EOL while scanning string literal":
            self.intro_text = (
                "You haven't properly closed the string on line %s."
                % self.error_info["lineno"]
                + "\n(If you want a multi-line string, then surround it with"
                + " `'''` or `\"\"\"` at both ends.)"
            )

        elif (
            self.error_info["message"]
            == "EOF while scanning triple-quoted string literal"
        ):
            # lineno is not useful, as it is at the end of the file and user probably
            # didn't want the string to end there
            self.intro_text = "You haven't properly closed a triple-quoted string"

        else:
            if self.error_info["filename"] and os.path.isfile(
                self.error_info["filename"]
            ):
                with open(self.error_info["filename"], mode="rb") as fp:
                    try:
                        for t in tokenize.tokenize(fp.readline):
                            self.tokens.append(t)
                    except tokenize.TokenError as e:
                        self.token_error = e

                if not self.tokens or self.tokens[-1].type not in [
                    token.ERRORTOKEN,
                    token.ENDMARKER,
                ]:
                    self.tokens.append(
                        tokenize.TokenInfo(token.ERRORTOKEN, "", None, None, "")
                    )
            else:
                self.tokens = []

            unbalanced = self._sug_unbalanced_parens()
            if unbalanced:
                self.intro_text = (
                    "Unbalanced parentheses, brackets or braces:\n\n" + unbalanced.body
                )
                self.intro_confidence = 5
            else:
                self.intro_text = "Python doesn't know how to read your program."

                if "^" in str(self.error_info):
                    self.intro_text += (
                        "\n\nSmall `^` in the original error message shows where it gave up,"
                        + " but the actual mistake can be before this."
                    )

                self.suggestions = [self._sug_missing_or_misplaced_colon()]
Exemple #15
0
def tokenize(src):
    byte_src = io.BytesIO(src.encode('utf-8'))
    for token in py_tokenize.tokenize(byte_src.readline):
        if token.type == py_tokenize.ENCODING:  # discard encoding token
            continue
        if token.type == py_token.OP:
            token = py_tokenize.TokenInfo(token.exact_type, *token[1:])
        yield token
Exemple #16
0
def fixNames(tokenList):
    """Tweak a token list taking spreadsheet references into account.

    I'm co-opting the python tokenizer and have to tweak the NAME tokens
    to allow for spreadsheet references.
    A custom tokenizer or, even better, one for calc sheets would obviate
    this completely.

    Parameters
    tokenList : list of TokenInfo from tokenizer

    Returns
    list of TokenInfo
    """
    result = []
    while len(tokenList) > 0:
        token = tokenList.pop(0)
        if token.type in (tokenize.ERRORTOKEN, tokenize.COMMENT) \
            or (token.type == tokenize.OP and token.string in ('.',':')):
            # if previous isn't an OP, have to bind to that
            # ditto when following.
            if len(result) > 0:
                prv = result.pop()
                if prv.type == tokenize.OP:
                    result.append(prv)  # put it back
                else:
                    token = tokenize.TokenInfo(tokenize.NAME,
                                               prv.string + token.string,
                                               prv.start, token.end,
                                               token.line)

            nxt = tokenList.pop(0) if len(tokenList) > 0 else None
            while nxt != None and (nxt.type
                                   not in (tokenize.OP, tokenize.NEWLINE)
                                   or nxt.string == '.'):
                token = tokenize.TokenInfo(tokenize.NAME,
                                           token.string + nxt.string,
                                           token.start, nxt.end, token.line)
                nxt = tokenList.pop(0) if len(tokenList) > 0 else None

            if nxt != None: tokenList.insert(0, nxt)

        result.append(token)

    return result
Exemple #17
0
 def replacements(self) -> typing.Iterator[Replacement]:
     token = tokenize.TokenInfo(
         type=tokenize.STRING,
         string=self.content,
         start=(1, 0),
         end=(2, 0),
         line=self.content,
     )
     yield from Replacement.from_token(token, self.words)
Exemple #18
0
    def test_encoding_token(self):
        """Make sure the encoding token doesn't change the checker's behavior

        _tokenize_str doesn't produce an encoding token, but
        reading a file does
        """
        with self.assertNoMessages():
            encoding_token = tokenize.TokenInfo(tokenize.ENCODING, "utf-8", (0, 0), (0, 0), '')
            tokens = [encoding_token] + _tokenize_str('if (\n        None):\n    pass\n')
            self.checker.process_tokens(tokens)
Exemple #19
0
def _replace_with_newline(tokens: List[tokenize.TokenInfo], pos: int) -> None:
    tok = tokens[pos]
    tokens[pos] = tokenize.TokenInfo(
        token.NEWLINE,
        '\n',
        tok.start,
        tok.end,
        tok.line)
    line_offset = 0
    for i, tok2 in enumerate(tokens[pos + 1:]):
        if i == 0:
            line_offset = tok2.start[1]
        if tok2.start[0] != tok.start[0]:
            line_offset = 0
        tokens[i + pos + 1] = tokenize.TokenInfo(
            tok2.type, tok2.string,
            start=(tok2.start[0] + 1, tok2.start[1] - line_offset),
            end=(tok2.end[0] + 1, tok2.end[1] - line_offset),
            line=tok2.line)
Exemple #20
0
def translate_hstrings(tokens):
    i = 0
    context = [False]
    html_def = False  # True if upcoming indent enters html template
    # test if string contains markup characters and needs escaping.  We
    # also include strings that contain % format characters (otherwise str
    # formatting would convert htmltext to str).  If the string doesn't
    # contain any of these characters, we will leave it as an ordinary
    # string.
    need_escape = re.compile(r'[&<>"%]').search
    # test if a string appears on the same line as a .join call, if so,
    # change it to an h-string since str.join doesn't handle htmltext
    # strings.
    is_join = re.compile(r'["\']\.join').search
    while i < len(tokens):
        tok = tokens[i]
        if tok.type == tokenize.NAME and tok.string == 'def':
            if (
                tokens[i + 2][:2] == (tokenize.OP, '[')
                and tokens[i + 3][1] == 'html'
                and tokens[i + 4][:2] == (tokenize.OP, ']')
            ):
                html_def = True
            else:
                html_def = False
        elif tok.type == tokenize.INDENT:
            if html_def:
                context.append(True)
                html_def = False
            else:
                context.append(context[-1])
        elif tok.type == tokenize.DEDENT:
            context.pop()
        elif tok.type == tokenize.STRING and context[-1]:
            # found literal string inside html template
            str_tok = list(tokens[i])
            # prefix string with marker
            s = str_tok[1]
            if s[:1] == 'f':
                # we could support this, just gets more complicated
                raise RuntimeError(
                    'f-strings in PTL source not supported '
                    'by conversion. %r' % tok.line
                )
            if s[:1] not in {'"', "'", 'u'}:
                raise RuntimeError('bad string in html template %r' % s)
            if need_escape(ast.literal_eval(s)) or is_join(tok.line):
                # we are creating h-string
                if '{' in s:
                    # escape f-string chars
                    s = s.replace('{', '{{')
                    s = s.replace('}', '}}')
                str_tok[1] = 'h' + s
                tokens[i] = tokenize.TokenInfo(*str_tok)
        i += 1
Exemple #21
0
 def replacements(self) -> typing.Iterator[Replacement]:
     for row_offset, line in enumerate(self.content.split('\n')):
         for match in REX_STRING.finditer(line):
             token = tokenize.TokenInfo(
                 type=tokenize.STRING,
                 string=match.group(0),
                 start=(1+row_offset, match.start()),
                 end=(2+row_offset, 0),
                 line=self.content,
             )
             yield from Replacement.from_token(token, self.words)
Exemple #22
0
def pysh_tokenize(readline):
    tokens = tokenize.generate_tokens(readline)
    parse = parser.statement(tokens)
    # print(list(parse))
    return parse
    indent = 0
    last_obj = None
    gen_name = get_temp()
    try:
        while True:
            token = next(tokens)
            print(token)
            ttype, tvalue, tstart, tend, tline = token
            if ttype == tokenize.NAME and tvalue == 'e':
                tnext = next(tokens)
                print(tnext)
                if tnext[0] == tokenize.STRING and tnext[2][1] == tend[1]:
                    # 解析e字符串
                    last_obj = next(gen_name)
                    # yield tokenize.TokenInfo(tokenize.NAME, last_obj, token[2], (0, 0), '')
                    # yield tokenize.TokenInfo(tokenize.OP, '=', (0, 0), (0, 0), '')
                    yield tokenize.TokenInfo(tokenize.NAME, 'Exec', token[2], (0, 0), '')
                    yield tokenize.TokenInfo(tokenize.OP, '(', (0, 0), (0, 0), '')
                    yield tokenize.TokenInfo(tokenize.STRING, tnext[1], (0, 0), (0, 0), '')
                    yield tokenize.TokenInfo(tokenize.OP, ')', (0, 0), tnext[3], '')
                    # yield tokenize.TokenInfo(tokenize.NEWLINE, '\n', (0, 0), tnext[3], '')
                    # for x in indents(indent):
                    #     yield x
                    continue
                else:
                    # e后面不是紧跟字符串,原样返回
                    yield token
                    yield tnext
                    continue
            elif ttype == tokenize.INDENT:
                indent += 1
            elif ttype == tokenize.NEWLINE:
                indent = 0
            yield token
    except (StopIteration, tokenize.TokenError):
        pass
    def test_simple_strings_not_credentials(self):
        """
        Checks that the checker doesn't raise a message for regular strings
        """

        tokens = [
            tokenize.TokenInfo(tokenize.STRING, "This is my string!", (0, 0),
                               (0, 0), "")
        ]

        with self.assertNoMessages():
            self.checker.process_tokens(tokens)
def replace_keywords(token_list):
    for t in token_list:
        if (t.type, t.string) in REPLACEMENTS:
            yield tokenize.TokenInfo(
                type=t.type,
                string=REPLACEMENTS[(t.type, t.string)],
                start=t.start,
                end=t.end,
                line=t.line
            )
        else:
            yield t
Exemple #25
0
 def next_atom(self):
     tok = self.next()
     if tok.string == '-':
         tok = self.next()
         if tok.type != tokenize.NUMBER:
             raise ParseError('%s unexpected `%s`' %
                              (tok.start, tok.string))
         return tokenize.TokenInfo(type=tok.type,
                                   string='-' + tok.string,
                                   start=tok.start,
                                   end=tok.end,
                                   line=tok.line)
     return tok
Exemple #26
0
def 外部連結翻譯(Token表,from_l='auto',to_l='zh-tw'):

    翻譯器= ryOuterTranslate.Translator(from_lang=from_l, to_lang=to_l)

    for n,t in enumerate(Token表):
        連結翻譯= False
        註解類型=0
        if ((t.type==tn.STRING) 
            and (  ('"""' in t.string) 
                or ("'''" in t.string) )):

            三引號= '"""' if ('"""' in t.string) else "'''"
            待翻譯文字= t.string.strip(三引號)
            註解類型=1
            連結翻譯= True

        # 檢測是否為註解,如為#!則不進行
        if (    (t.type==tn.COMMENT) 
            and('#' in t.string)
            and('#!' not in t.string)
            ): 
             # 將字串的#去掉
            待翻譯文字= t.string.strip('#')
            註解類型=2
            連結翻譯= True
        
        if 連結翻譯== True:
            try:
                翻譯後文字= 翻譯器.translate(待翻譯文字)
                翻譯後文字= 翻譯文字修正(翻譯後文字)
            except:
                翻譯後文字= '翻譯失敗,網路可能有問題!保留原文 ... ' + 待翻譯文字  
            if 註解類型==1:
                新字串 = "'''" 
                新字串 += 翻譯後文字 
                新字串 += "'''"
            elif 註解類型==2:
                新字串 = "# " 
                新字串 += 翻譯後文字 
                新字串 += ''
            
            標記型態= t.type
            標記字串= 新字串 #D[t.string]
            開始位置= t.start # 可能也要改
            結束位置= t.end   # 可能也要改
            整行文字= t.line  # 可能也要改
            
            Token表[n]= tn.TokenInfo(標記型態,標記字串,開始位置,結束位置,整行文字)
    return Token表
Exemple #27
0
def translate_py(tokens):
    i = 0
    while i < len(tokens):
        tok = tokens[i]
        if tok.type == tokenize.NAME and tok.string == 'def':
            t = tokens[i + 1]
            if t.type == tokenize.NAME:
                if t.string.startswith(HTML_PREFIX):
                    template_type = 'html'
                    prefix = HTML_PREFIX
                elif t.string.startswith(PLAIN_PREFIX):
                    template_type = 'plain'
                    prefix = PLAIN_PREFIX
                else:
                    template_type = ''
                if template_type:
                    func_name = t.string[len(prefix):]
                    func_name += ' [' + template_type + '] '
                    tokens[i + 1] = tokenize.TokenInfo(t.type, func_name,
                                                       t.start, t.end, t.line)
        elif tok.type == tokenize.STRING and tok.string[0] == 'F':
            tokens[i] = tokenize.TokenInfo(tok.type, 'h' + tok.string[1:],
                                           tok.start, tok.end, tok.line)
        i += 1
Exemple #28
0
 def check_for_dummy(self, type: str) -> None:
     """Check whether this is the position where we should insert a dummy token."""
     if self._dummy_pos is None or self._dummy_pos != self.mark():
         return
     assert self._dummy_count is not None
     if self._dummy_count > 0:
         self._dummy_count -= 1
         return
     pos = self._dummy_pos
     self._dummy_pos = None
     self._dummy_count = None
     tok = tokenize.TokenInfo(make_dummy_token_type(type), type, (0, 0),
                              (0, 0), "")
     self._tokenizer._tokens.insert(pos, tok)  # TODO: Make an API for this
     self._dummy_inserted = pos
Exemple #29
0
def demojize(lines, delimiters=('_', '_')):
    str = ''.join(lines or [])
    import tokenize
    import emoji
    tokens = []
    try:
        for token in list(tokenize.tokenize(
                __import__('io').BytesIO(str.encode()).readline)):
            if token.type == tokenize.ERRORTOKEN:
                string = emoji.demojize(token.string, delimiters=delimiters
                                        ).replace('-', '_').replace("’", "_")
                if tokens and tokens[-1].type == tokenize.NAME:
                    tokens[-1] = tokenize.TokenInfo(tokens[-1].type, tokens[-1].string +
                                                    string, tokens[-1].start, tokens[-1].end, tokens[-1].line)
                else:
                    tokens.append(
                        tokenize.TokenInfo(
                            tokenize.NAME, string, token.start, token.end, token.line))
            else:
                tokens.append(token)
        return tokenize.untokenize(tokens).decode().splitlines(True)
    except BaseException:
        ...
    return ''.join(lines)
Exemple #30
0
def group_operand(tokens: TokenTuple) -> typing.Optional[tokenize.TokenInfo]:
    token_length = len(tokens)
    if token_length == 0:
        return None
    if token_length == 1:
        return tokens[0]

    first_char_position = tokens[0].start[1]
    last_char_position = tokens[token_length - 1].end[1]
    column_name = tokens[0].line[first_char_position:last_char_position]
    return tokenize.TokenInfo(type=1,
                              string=column_name,
                              start=(1, first_char_position),
                              end=(1, last_char_position),
                              line=tokens[0].line)