Python untokenize 예제들, token_utils.untokenize Python 예제들

예제 #1

0

파일 보기

파일: obfuscate.py 프로젝트: liyingkun1237/CcxMLOGE

def apply_obfuscation(source):
    """
    Returns 'source' all obfuscated.
    """
    global keyword_args
    global imported_modules

    tokens = token_utils.listified_tokenizer(source)
    keyword_args = analyze.enumerate_keyword_args(tokens)
    imported_modules = analyze.enumerate_imports(tokens)

    variables = find_obfuscatables(tokens, obfuscatable_variable)
    classes = find_obfuscatables(tokens, obfuscatable_class)
    functions = find_obfuscatables(tokens, obfuscatable_function)

    variables = list(set(variables).difference(set(imported_modules)))
    for variable in variables:
        replace_obfuscatables(imported_modules, tokens, obfuscate_variable,
                              variable, name_generator)
    # for function in functions:
    #     replace_obfuscatables(imported_modules,
    #         tokens, obfuscate_function, function, name_generator)
    # for _class in classes:
    #     replace_obfuscatables(imported_modules,tokens, obfuscate_class, _class, name_generator)
    return token_utils.untokenize(tokens)

예제 #2

0

파일 보기

파일: minification.py 프로젝트: wwe1428103707/SolidityObfuscator

def minify(tokens, options):
    """
    Performs minification on *tokens* according to the values in *options*
    """
    # Remove comments
    #remove_comments(tokens)
    # Remove docstrings
    #remove_docstrings(tokens)
    result = token_utils.untokenize(tokens)
    # Minify our input script
    result = multiline_indicator.sub('', result)
    #result = fix_empty_methods(result)
    result = join_multiline_pairs(result)
    result = join_multiline_pairs(result, '[]')
    print(result)
    print(
        "\n---------------------------------------------------------------------\n"
    )
    # result = join_multiline_pairs(result, '{}')  # make code become one line

    result = remove_blank_lines(result)
    result = reduce_operators(result)
    #print (result)
    #result = dedent(result, use_tabs=options.tabs)
    return result

예제 #3

0

파일 보기

파일: function_simplest.py 프로젝트: aroberge/ideas

def transform_source(source, **kwargs):
    """A simple replacement of ``function`` by ``lambda``."""
    tokens = token_utils.tokenize(source)
    for token in tokens:
        if token == "λ":
            token.string = "lambda"
    return token_utils.untokenize(tokens)

예제 #4

0

파일 보기

def transform_source(source, **kwargs):
    """Replace integers by Fraction objects"""
    tokens = token_utils.tokenize(source)
    for token in tokens:
        if token.is_integer():
            token.string = f"Fraction({token.string})"

    return token_utils.untokenize(tokens)

예제 #5

0

파일 보기

파일: lambda_codec.py 프로젝트: aroberge/ideas

def transform_source(source, **kwargs):
    """Simple transformation: replaces any single token λ by lambda.

    By defining this function, we can also make use of Ideas' console.
    """
    tokens = token_utils.tokenize(source)
    for token in tokens:
        if token == "λ":
            token.string = "lambda"
    return token_utils.untokenize(tokens)

예제 #6

0

파일 보기

def transform_source(source, **kwargs):
    """Simple transformation: replaces any explicit float by a Decimal.

    By defining this function, we can also make use of Ideas' console.
    """
    tokens = token_utils.tokenize(source)
    for token in tokens:
        if token.is_number() and "." in token.string:
            token.string = f"Decimal('{token.string}')"

    return token_utils.untokenize(tokens)

예제 #7

0

파일 보기

def automatic_self(source):
    """Replaces code like::

        self .= :
            a
            b
            c = this if __ == that else ___

    by::

        self.a = a
        self.b = b
        self.c = this if c == that else c
    """
    new_tokens = []
    auto_self_block = False
    self_name = ""
    indentation = 0

    get_nb = token_utils.get_number
    get_first = token_utils.get_first
    get_first_index = token_utils.get_first_index

    for tokens in token_utils.get_lines(source):
        if auto_self_block:
            variable = get_first(tokens)
            if variable is not None:  # None would mean an empty line
                var_name = variable.string
                block_indent = variable.start_col
                if block_indent > indentation:
                    dedent = block_indent - indentation
                    if get_nb(tokens) == 1:
                        variable.string = f"{self_name}.{var_name} = {var_name}"
                        tokens = token_utils.dedent(tokens, dedent)
                    else:
                        variable.string = f"{self_name}.{var_name}"
                        for token in tokens:
                            if token.string == "__":
                                token.string = var_name
                        tokens = token_utils.dedent(tokens, dedent)
                else:
                    auto_self_block = False
        elif get_nb(tokens) == 4:
            index = get_first_index(tokens)
            if (tokens[index].is_identifier() and tokens[index + 1] == "."
                    and tokens[index + 2] == "=" and tokens[index + 1].end_col
                    == tokens[index + 2].start_col
                    and tokens[index + 3] == ":"):
                self_name = tokens[index].string
                indentation = tokens[index].start_col
                auto_self_block = True
                continue
        new_tokens.extend(tokens)
    return token_utils.untokenize(new_tokens)

예제 #8

0

파일 보기

def transform_source(source, **kwargs):
    """Simple transformation: replaces any explicit float followed by ``D``
    by a Decimal.
    """
    tokens = token_utils.tokenize(source)
    for first, second in zip(tokens, tokens[1:]):
        if first.is_number() and "." in first.string and second == "D":
            first.string = f"Decimal('{first.string}')"
            second.string = ""

    return token_utils.untokenize(tokens)

예제 #9

0

파일 보기

def french_to_english(source):
    """A simple replacement of 'French Python keyword' by their normal
       English version.
    """
    new_tokens = []
    for token in token_utils.tokenize(source):
        if token.string in fr_to_py:
            token.string = fr_to_py[token.string]
        new_tokens.append(token)

    new_source = token_utils.untokenize(new_tokens)
    return new_source

예제 #10

0

파일 보기

파일: data_augmentation.py 프로젝트: zhlzhl/aug_cola

def random_deletion(sentence, n=1):
    tokens = tokenize(sentence)

    # obviously, if there's only one word, don't delete it
    if len(tokens) == 1:
        return tokens

    # randomly delete upto n words
    count = 0
    while count < n:
        assert n < len(tokens)
        rand_index = random.randint(0, len(tokens) - 1)
        del tokens[rand_index]
        count += 1

    return untokenize(tokens)

예제 #11

0

파일 보기

def function_as_a_keyword(source):
    """A simple replacement of ``function`` by ``lambda``.

    Note that, while the string ``lambda`` is shorter than ``function``, we
    do not adjust the information (start_col, end_col) about the position
    of the token. ``untokenize`` uses that information together with the
    information about each original line, to properly keep track of the
    spacing between tokens.
    """
    new_tokens = []
    for token in token_utils.tokenize(source):
        if token == "function":
            token.string = "lambda"
        new_tokens.append(token)

    return token_utils.untokenize(new_tokens)

예제 #12

0

파일 보기

파일: repeat.py 프로젝트: aroberge/ideas

def convert_repeat(source, predictable_names=False):
    """Replaces instances of::

        repeat forever: -> while True:
        repeat while condition: -> while  condition:
        repeat until condition: -> while not condition:
        repeat n: -> for _uid in range(n):

    A complete repeat statement is restricted to be on a single line ending
    with a colon (optionally followed by a comment). If the colon is
    missing, a ``RepeatSyntaxError`` is raised.
    """

    new_tokens = []
    if predictable_names:
        variable_name = utils.generate_predictable_names()
    else:
        variable_name = utils.generate_variable_names()

    for tokens in token_utils.get_lines(source):
        # a line of tokens can start with INDENT or DEDENT tokens ...
        first_token = token_utils.get_first(tokens)
        if first_token == "repeat":
            last_token = token_utils.get_last(tokens)
            if last_token != ":":
                raise RepeatSyntaxError(
                    "Missing colon for repeat statement on line " +
                    f"{first_token.start_row}\n    {first_token.line}.")

            repeat_index = token_utils.get_first_index(tokens)
            second_token = tokens[repeat_index + 1]
            if second_token == "forever":
                first_token.string = "while"
                second_token.string = "True"
            elif second_token == "while":
                first_token.string = "while"
                second_token.string = ""
            elif second_token == "until":
                first_token.string = "while"
                second_token.string = "not"
            else:
                first_token.string = "for %s in range(" % next(variable_name)
                last_token.string = "):"

        new_tokens.extend(tokens)

    return token_utils.untokenize(new_tokens)

예제 #13

0

파일 보기

def pyminify(options, _file):

    module = os.path.split(_file)[1]
    module = ".".join(module.split('.')[:-1])
    filesize = os.path.getsize(_file)
    source = open(_file, 'rb').read()
    tokens = token_utils.listified_tokenizer(source)

     # Perform obfuscation if any of the related options were set
    if options['obfuscate']:
        identifier_length = int(options['replacement_length'])
        name_generator = obfuscate.obfuscation_machine(identifier_length=identifier_length)
        obfuscate.obfuscate(module, tokens, options)

    result = token_utils.untokenize(tokens).strip()
    #result = filter(lambda x: x != '\r' and x != '\n', ' '.join(result.split()))
    print result

예제 #14

0

파일 보기

파일: data_augmentation.py 프로젝트: zhlzhl/aug_cola

def replace(sentence, the_word, synonym):
    tokens = tokenize(sentence)
    # replace the_word with synonym
    try:
        assert the_word in tokens
    except AssertionError:
        print("AssertionError")
        print("sentence: {}\nthe world: {}\nsynonym: {}".format(sentence, the_word, synonym))
        return None

    new_tokens = [synonym if word == the_word else word for word in tokens]
    new_sentence = untokenize(new_tokens)

    # print("--old: ", sentence)
    # print("replaced", the_word, "with", synonym)
    # print("--new: ", new_sentence)

    return new_sentence

예제 #15

0

파일 보기

파일: minification.py 프로젝트: wwe1428103707/SolidityObfuscator

def join_multiline_pairs(source, pair="()"):
    """
    Finds and removes newlines in multiline matching pairs of characters in
    *source*.

    By default it joins parens () but it will join any two characters given via
    the *pair* variable.

    .. note::

        Doesn't remove extraneous whitespace that ends up between the pair.
        Use `reduce_operators()` for that.

    Example::

        test = (
            "This is inside a multi-line pair of parentheses"
        )

    Will become::

        test = (            "This is inside a multi-line pair of parentheses"        )

    """
    opener = pair[0]
    closer = pair[1]
    io_obj = io.StringIO(source)
    out_tokens = []
    open_count = 0
    for tok in tokenize.generate_tokens(io_obj.readline):
        token_type = tok[0]
        token_string = tok[1]
        if token_type == tokenize.OP and token_string in pair:
            if token_string == opener:
                open_count += 1
            elif token_string == closer:
                open_count -= 1
            out_tokens.append(tok)
        elif token_type in (tokenize.NL, tokenize.NEWLINE):
            if open_count == 0:
                out_tokens.append(tok)
        else:
            out_tokens.append(tok)
    return token_utils.untokenize(out_tokens)

예제 #16

0

파일 보기

파일: minification.py 프로젝트: stevenyu113228/Python-Emojiify

def minify(tokens):
    """
    Performs minification on *tokens* according to the values in *options*
    """
    # Remove comments
    remove_comments(tokens)
    # Remove docstrings
    remove_docstrings(tokens)
    result = token_utils.untokenize(tokens)
    # Minify our input script
    result = multiline_indicator.sub('', result)
    result = fix_empty_methods(result)
    result = join_multiline_pairs(result)
    result = join_multiline_pairs(result, '[]')
    result = join_multiline_pairs(result, '{}')
    result = remove_blank_lines(result)
    result = reduce_operators(result)
    result = dedent(result)
    return result

예제 #17

0

파일 보기

def transform_source(source, **kwargs):
    """Does the following transformation::

        with float_as_Decimal:
            a = 1.0
            b = 2.0
        c = 3.0

    to::

        if True: # with float_as_Decimal:
            a = Decimal('1.0')
            b = Decimal('2.0')
        c = 3.0
    """

    new_tokens = []
    decimal_block = False

    for line in token_utils.get_lines(source):
        first = token_utils.get_first(line)
        if first is None:
            new_tokens.extend(line)
            continue
        elif first == "with" :
            first_index = token_utils.get_first_index(line)
            if len(line) > first_index + 1:
                second = line[first_index + 1]
                if second == "float_as_Decimal":
                    first.string = "if"
                    second.string = "True"
                    indentation = first.start_col
                    decimal_block = True
        elif decimal_block and first.start_col > indentation:
            for token in line:
                if token.is_number() and "." in token.string:
                    token.string = f"Decimal('{token.string}')"
        else:
            indentation = first.start_col

        new_tokens.extend(line)

    return token_utils.untokenize(new_tokens)

예제 #18

0

파일 보기

파일: minification.py 프로젝트: jerzyorlowski/pyminifier

def minify(tokens, options):
    """
    Performs minification on *tokens* according to the values in *options*
    """
    # Remove comments
    remove_comments(tokens)
    # Remove docstrings
    remove_docstrings(tokens)
    result = token_utils.untokenize(tokens)
    # Minify our input script
    result = multiline_indicator.sub('', result)
    result = fix_empty_methods(result)
    result = join_multiline_pairs(result)
    result = join_multiline_pairs(result, '[]')
    result = join_multiline_pairs(result, '{}')
    result = remove_blank_lines(result)
    result = reduce_operators(result)
    result = dedent(result, use_tabs=options.tabs)
    
    return result

예제 #19

0

파일 보기

파일: nobreak.py 프로젝트: aroberge/ideas

def nobreak_as_a_keyword(source):
    """``nobreak`` is replaced by ``else`` only if it is the first
    non-space token on a line and if its indentation matches
    that of a ``for`` or ``while`` block.
    """
    indentations = {}
    lines = token_utils.get_lines(source)
    new_tokens = []
    for line in lines:
        first = token_utils.get_first(line)
        if first is None:
            new_tokens.extend(line)
            continue
        if first == "nobreak":
            if first.start_col in indentations:
                if indentations[first.start_col] in ["for", "while"]:
                    first.string = "else"
        indentations[first.start_col] = first.string
        new_tokens.extend(line)

    return token_utils.untokenize(new_tokens)

예제 #20

0

파일 보기

파일: implicit_multiplication.py 프로젝트: aroberge/ideas

def add_multiplication_symbol(source):
    """This adds a multiplication symbol where it would be understood as
    being implicit by the normal way algebraic equations are written but would
    be a SyntaxError in Python. Thus we have::

        2n  -> 2*n
        n 2 -> n* 2
        2(a+b) -> 2*(a+b)
        (a+b)2 -> (a+b)*2
        2 3 -> 2* 3
        m n -> m* n
        (a+b)c -> (a+b)*c

    The obvious one (in algebra) being left out is something like ``n(...)``
    which is a function call - and thus valid Python syntax.
    """

    tokens = token_utils.tokenize(source)
    if not tokens:
        return tokens

    prev_token = tokens[0]
    new_tokens = [prev_token]

    for token in tokens[1:]:
        # The code has been written in a way to demonstrate that this type of
        # transformation could be done as the source is tokenized by Python.
        if ((prev_token.is_number() and
             (token.is_identifier() or token.is_number() or token == "("))
                or (prev_token.is_identifier() and
                    (token.is_identifier() or token.is_number()))
                or (prev_token == ")" and
                    (token.is_identifier() or token.is_number()))):
            new_tokens.append("*")
        new_tokens.append(token)
        prev_token = token

    return token_utils.untokenize(new_tokens)

예제 #21

0

파일 보기

파일: data_augmentation.py 프로젝트: zhlzhl/aug_cola

def random_swap(sentence, distance=1):
    """
    randomly swap words in a sentence
    :params[in]: sentence, a string, input sentence
    :params[in]: distance, integer, distance of words

    :params[out]: n_sentence, a string, new sentence
    """
    # lis = sent.split(' ')  # split by spaces
    tokens = tokenize(sentence)
    tokens_length = len(tokens)
    assert tokens_length >= 2
    index1 = random.randint(0, tokens_length - 1)
    # canidates pool
    candidates = set(range(index1 - distance, index1 + distance + 1)) & set(range(tokens_length))
    candidates.remove(index1)
    # randomly sample another index
    index2 = random.sample(candidates, 1)[0]
    # swap two elements
    tokens[index1], tokens[index2] = tokens[index2], tokens[index1]
    # n_sen = ' '.join(lis)
    n_sentence = untokenize(tokens)
    # return new sentence
    return n_sentence

예제 #22

0

파일 보기

def check_lines(source):
    lines = token_utils.get_lines(source)
    tokens = []
    for line in lines:
        tokens.extend(line)
    assert source == token_utils.untokenize(tokens)

예제 #23

0

파일 보기

def toValidEqn(source):
    """This adds a multiplication symbol where it would be understood as
	being implicit by the normal way algebraic equations are written but would
	be a SyntaxError in Python. Thus we have::
		2N  -> 2*N
		N 2 -> N* 2
		2(A+B) -> 2*(A+B)
		(A+B)2 -> (A+B)*2
		2 3 -> 2* 3
		M N -> M* N
		(A+B)C -> (A+B)*C
		A(3) -> A*(3)
		a(3) -> a(3) - will only add multiplication if the preceding token is capital, since that is a variable
	"""
    """
	Modified from ideas
	https://github.com/aroberge/ideas/blob/master/ideas/examples/implicit_multiplication.py
	"""

    constants = [
        'BLUE', 'RED', 'BLACK', 'MAGENTA', 'GREEN', 'ORANGE', 'BROWN', 'NAVY',
        'LTBLUE', 'YELLOW', 'WHITE', 'LTGRAY', 'MEDGRAY', 'GRAY', 'DARKGRAY'
    ]

    tokens = token_utils.tokenize(source)
    if not tokens:
        return tokens

    prev_token = tokens[0]
    new_tokens = [prev_token]

    for token in tokens[1:]:
        if token.is_not_in(constants):
            # Check if implicit multiplication should be added
            if (((prev_token.is_number() or
                  (prev_token.is_identifier() and prev_token.string.isupper()))
                 and ((token.is_identifier() and token.string.isupper())
                      or token.is_number() or token == "(")) or
                ((prev_token.is_identifier() and prev_token.string.isupper())
                 and ((token.is_identifier() and token.string.isupper())
                      or token.is_number()))
                    or (prev_token == ")" and
                        ((token.is_identifier() and token.string.isupper())
                         or token.is_number()))):
                new_tokens.append("*")

            if token.is_identifier() and token.string.isupper() and len(
                    token.string) > 1:
                # Multiple variables next to one another
                # ABC -> A*B*C
                token.string = '*'.join(token.string)
                new_tokens.append(token)
            else:
                new_tokens.append(token)
        else:
            # Token in constants, skip
            new_tokens.append(token)

        prev_token = token

    return token_utils.untokenize(new_tokens)

예제 #24

0

파일 보기

    for function in functions:
        replace_obfuscatables(module, tokens, obfuscate_function, function,
                              name_generator, table)

    for _class in classes:
        replace_obfuscatables(module, tokens, obfuscate_class, _class,
                              name_generator, table)

    obfuscate_global_import_methods(module, tokens, name_generator, table)
    obfuscate_builtins(module, tokens, name_generator, table)


if __name__ == "__main__":
    global name_generator
    if len(sys.argv) != 3:
        print("Usage: %s <emoji_length> <filename.py>" % sys.argv[0])
        sys.exit(1)

    source = open(sys.argv[2]).read()
    replacement_length = int(sys.argv[1])

    tokens = token_utils.listified_tokenizer(source)
    source = minification.minify(tokens)

    tokens = token_utils.listified_tokenizer(source)

    obfuscate(source, tokens, replacement_length)
    result = ''
    result += token_utils.untokenize(tokens)
    # print(result)

예제 #25

0

파일 보기

def test_indent():
    new_tokens = token_utils.indent(tokens2, 4)
    new_line_a = token_utils.untokenize(new_tokens)
    new_line_b = token_utils.untokenize(lines3[2])
    assert new_line_a == new_line_b

예제 #26

0

파일 보기

파일: switch.py 프로젝트: aroberge/ideas

def convert_switch(source, predictable_names=False):
    """Replaces code like::

        switch EXPR:
            case EXPR_1:
                SUITE
            case EXPR_2:
                SUITE
            case in EXPR_3, EXPR_4, ...:
                SUITE
            ...
            else:
                SUITE

    by::

        var_name = EXPR
        if var_name == EXPR_1:
                SUITE
        elif var_name == EXPR_2:
                SUITE
        elif var_name in EXPR_3, EXPR_4, ...:
                SUITE
        else:
                SUITE
        del var_name

    Limitation: switch blocks cannot be part of a SUITE of another switch block.
    """
    new_tokens = []
    switch_block = False
    first_case = False
    if predictable_names:
        variable_name = utils.generate_predictable_names()
    else:
        variable_name = utils.generate_variable_names()

    for line in token_utils.get_lines(source):
        first_token = token_utils.get_first(line)
        if first_token is None:
            new_tokens.extend(line)
            continue

        if len(line) > 1:
            _index = token_utils.get_first_index(line)
            second_token = line[_index + 1]
        else:
            second_token = None

        if not switch_block:
            if first_token == "switch":
                switch_indent = first_token.start_col
                var_name = next(variable_name)
                first_token.string = f"{var_name} ="
                switch_block = True
                first_case = True
                colon = token_utils.get_last(line)
                colon.string = ""
        else:
            if first_token.start_col == switch_indent:
                switch_block = False
                new_tokens.extend([" " * switch_indent + f"del {var_name}\n"])

            elif first_token == "case" or first_token == "else":
                if first_case and first_token == "case":
                    if second_token == "in":
                        first_token.string = f"if {var_name}"
                    else:
                        first_token.string = f"if {var_name} =="
                    first_case = False
                elif first_token == "case":
                    if second_token == "in":
                        first_token.string = f"elif {var_name}"
                    else:
                        first_token.string = f"elif {var_name} =="
                dedent = first_token.start_col - switch_indent
                line = token_utils.dedent(line, dedent)

        new_tokens.extend(line)
    return token_utils.untokenize(new_tokens)

예제 #27

0

파일 보기

def check(source):
    tokens = token_utils.tokenize(source)
    new_source = token_utils.untokenize(tokens)
    print(len(source), len(new_source))
    assert source == new_source