Python tokenize 예제들, token_utils.tokenize Python 예제들

예제 #1

0

파일 보기

파일: function_simplest.py 프로젝트: aroberge/ideas

def transform_source(source, **kwargs):
    """A simple replacement of ``function`` by ``lambda``."""
    tokens = token_utils.tokenize(source)
    for token in tokens:
        if token == "λ":
            token.string = "lambda"
    return token_utils.untokenize(tokens)

예제 #2

0

파일 보기

def transform_source(source, **kwargs):
    """Replace integers by Fraction objects"""
    tokens = token_utils.tokenize(source)
    for token in tokens:
        if token.is_integer():
            token.string = f"Fraction({token.string})"

    return token_utils.untokenize(tokens)

예제 #3

0

파일 보기

파일: lambda_codec.py 프로젝트: aroberge/ideas

def transform_source(source, **kwargs):
    """Simple transformation: replaces any single token λ by lambda.

    By defining this function, we can also make use of Ideas' console.
    """
    tokens = token_utils.tokenize(source)
    for token in tokens:
        if token == "λ":
            token.string = "lambda"
    return token_utils.untokenize(tokens)

예제 #4

0

파일 보기

def transform_source(source, **kwargs):
    """Simple transformation: replaces any explicit float followed by ``D``
    by a Decimal.
    """
    tokens = token_utils.tokenize(source)
    for first, second in zip(tokens, tokens[1:]):
        if first.is_number() and "." in first.string and second == "D":
            first.string = f"Decimal('{first.string}')"
            second.string = ""

    return token_utils.untokenize(tokens)

예제 #5

0

파일 보기

def transform_source(source, **kwargs):
    """Simple transformation: replaces any explicit float by a Decimal.

    By defining this function, we can also make use of Ideas' console.
    """
    tokens = token_utils.tokenize(source)
    for token in tokens:
        if token.is_number() and "." in token.string:
            token.string = f"Decimal('{token.string}')"

    return token_utils.untokenize(tokens)

예제 #6

0

파일 보기

def french_to_english(source):
    """A simple replacement of 'French Python keyword' by their normal
       English version.
    """
    new_tokens = []
    for token in token_utils.tokenize(source):
        if token.string in fr_to_py:
            token.string = fr_to_py[token.string]
        new_tokens.append(token)

    new_source = token_utils.untokenize(new_tokens)
    return new_source

예제 #7

0

파일 보기

def function_as_a_keyword(source):
    """A simple replacement of ``function`` by ``lambda``.

    Note that, while the string ``lambda`` is shorter than ``function``, we
    do not adjust the information (start_col, end_col) about the position
    of the token. ``untokenize`` uses that information together with the
    information about each original line, to properly keep track of the
    spacing between tokens.
    """
    new_tokens = []
    for token in token_utils.tokenize(source):
        if token == "function":
            token.string = "lambda"
        new_tokens.append(token)

    return token_utils.untokenize(new_tokens)

예제 #8

0

파일 보기

파일: data_augmentation.py 프로젝트: zhlzhl/aug_cola

def random_deletion(sentence, n=1):
    tokens = tokenize(sentence)

    # obviously, if there's only one word, don't delete it
    if len(tokens) == 1:
        return tokens

    # randomly delete upto n words
    count = 0
    while count < n:
        assert n < len(tokens)
        rand_index = random.randint(0, len(tokens) - 1)
        del tokens[rand_index]
        count += 1

    return untokenize(tokens)

예제 #9

0

파일 보기

파일: data_augmentation.py 프로젝트: zhlzhl/aug_cola

def replace(sentence, the_word, synonym):
    tokens = tokenize(sentence)
    # replace the_word with synonym
    try:
        assert the_word in tokens
    except AssertionError:
        print("AssertionError")
        print("sentence: {}\nthe world: {}\nsynonym: {}".format(sentence, the_word, synonym))
        return None

    new_tokens = [synonym if word == the_word else word for word in tokens]
    new_sentence = untokenize(new_tokens)

    # print("--old: ", sentence)
    # print("replaced", the_word, "with", synonym)
    # print("--new: ", new_sentence)

    return new_sentence

예제 #10

0

파일 보기

파일: implicit_multiplication.py 프로젝트: aroberge/ideas

def add_multiplication_symbol(source):
    """This adds a multiplication symbol where it would be understood as
    being implicit by the normal way algebraic equations are written but would
    be a SyntaxError in Python. Thus we have::

        2n  -> 2*n
        n 2 -> n* 2
        2(a+b) -> 2*(a+b)
        (a+b)2 -> (a+b)*2
        2 3 -> 2* 3
        m n -> m* n
        (a+b)c -> (a+b)*c

    The obvious one (in algebra) being left out is something like ``n(...)``
    which is a function call - and thus valid Python syntax.
    """

    tokens = token_utils.tokenize(source)
    if not tokens:
        return tokens

    prev_token = tokens[0]
    new_tokens = [prev_token]

    for token in tokens[1:]:
        # The code has been written in a way to demonstrate that this type of
        # transformation could be done as the source is tokenized by Python.
        if ((prev_token.is_number() and
             (token.is_identifier() or token.is_number() or token == "("))
                or (prev_token.is_identifier() and
                    (token.is_identifier() or token.is_number()))
                or (prev_token == ")" and
                    (token.is_identifier() or token.is_number()))):
            new_tokens.append("*")
        new_tokens.append(token)
        prev_token = token

    return token_utils.untokenize(new_tokens)

예제 #11

0

파일 보기

파일: data_augmentation.py 프로젝트: zhlzhl/aug_cola

def random_swap(sentence, distance=1):
    """
    randomly swap words in a sentence
    :params[in]: sentence, a string, input sentence
    :params[in]: distance, integer, distance of words

    :params[out]: n_sentence, a string, new sentence
    """
    # lis = sent.split(' ')  # split by spaces
    tokens = tokenize(sentence)
    tokens_length = len(tokens)
    assert tokens_length >= 2
    index1 = random.randint(0, tokens_length - 1)
    # canidates pool
    candidates = set(range(index1 - distance, index1 + distance + 1)) & set(range(tokens_length))
    candidates.remove(index1)
    # randomly sample another index
    index2 = random.sample(candidates, 1)[0]
    # swap two elements
    tokens[index1], tokens[index2] = tokens[index2], tokens[index1]
    # n_sen = ' '.join(lis)
    n_sentence = untokenize(tokens)
    # return new sentence
    return n_sentence

예제 #12

0

파일 보기

def check(source):
    tokens = token_utils.tokenize(source)
    new_source = token_utils.untokenize(tokens)
    print(len(source), len(new_source))
    assert source == new_source

예제 #13

0

파일 보기

    source = "a\n  "
    source2 = "a\n\t"
    check(source)
    check(source2)

    check_lines(source)
    check_lines(source2)


source1 = "a = b"
source2 = "a = b # comment\n"
source3 = """
if True:
    a = b # comment
"""
tokens1 = token_utils.tokenize(source1)
tokens2 = token_utils.tokenize(source2)
lines3 = token_utils.get_lines(source3)


def test_first():
    assert token_utils.get_first(tokens1) == token_utils.get_first(tokens2)
    assert token_utils.get_first(tokens1) == "a"
    assert token_utils.get_first(tokens2, exclude_comment=False) == "a"
    assert token_utils.get_first_index(tokens1) == 0

    assert token_utils.get_first(lines3[2]) == "a"
    assert token_utils.get_first_index(lines3[2]) == 1


def test_last():

예제 #14

0

파일 보기

def toValidEqn(source):
    """This adds a multiplication symbol where it would be understood as
	being implicit by the normal way algebraic equations are written but would
	be a SyntaxError in Python. Thus we have::
		2N  -> 2*N
		N 2 -> N* 2
		2(A+B) -> 2*(A+B)
		(A+B)2 -> (A+B)*2
		2 3 -> 2* 3
		M N -> M* N
		(A+B)C -> (A+B)*C
		A(3) -> A*(3)
		a(3) -> a(3) - will only add multiplication if the preceding token is capital, since that is a variable
	"""
    """
	Modified from ideas
	https://github.com/aroberge/ideas/blob/master/ideas/examples/implicit_multiplication.py
	"""

    constants = [
        'BLUE', 'RED', 'BLACK', 'MAGENTA', 'GREEN', 'ORANGE', 'BROWN', 'NAVY',
        'LTBLUE', 'YELLOW', 'WHITE', 'LTGRAY', 'MEDGRAY', 'GRAY', 'DARKGRAY'
    ]

    tokens = token_utils.tokenize(source)
    if not tokens:
        return tokens

    prev_token = tokens[0]
    new_tokens = [prev_token]

    for token in tokens[1:]:
        if token.is_not_in(constants):
            # Check if implicit multiplication should be added
            if (((prev_token.is_number() or
                  (prev_token.is_identifier() and prev_token.string.isupper()))
                 and ((token.is_identifier() and token.string.isupper())
                      or token.is_number() or token == "(")) or
                ((prev_token.is_identifier() and prev_token.string.isupper())
                 and ((token.is_identifier() and token.string.isupper())
                      or token.is_number()))
                    or (prev_token == ")" and
                        ((token.is_identifier() and token.string.isupper())
                         or token.is_number()))):
                new_tokens.append("*")

            if token.is_identifier() and token.string.isupper() and len(
                    token.string) > 1:
                # Multiple variables next to one another
                # ABC -> A*B*C
                token.string = '*'.join(token.string)
                new_tokens.append(token)
            else:
                new_tokens.append(token)
        else:
            # Token in constants, skip
            new_tokens.append(token)

        prev_token = token

    return token_utils.untokenize(new_tokens)