def transform_source(source, **kwargs): """A simple replacement of ``function`` by ``lambda``.""" tokens = token_utils.tokenize(source) for token in tokens: if token == "λ": token.string = "lambda" return token_utils.untokenize(tokens)
def transform_source(source, **kwargs): """Replace integers by Fraction objects""" tokens = token_utils.tokenize(source) for token in tokens: if token.is_integer(): token.string = f"Fraction({token.string})" return token_utils.untokenize(tokens)
def transform_source(source, **kwargs): """Simple transformation: replaces any single token λ by lambda. By defining this function, we can also make use of Ideas' console. """ tokens = token_utils.tokenize(source) for token in tokens: if token == "λ": token.string = "lambda" return token_utils.untokenize(tokens)
def transform_source(source, **kwargs): """Simple transformation: replaces any explicit float followed by ``D`` by a Decimal. """ tokens = token_utils.tokenize(source) for first, second in zip(tokens, tokens[1:]): if first.is_number() and "." in first.string and second == "D": first.string = f"Decimal('{first.string}')" second.string = "" return token_utils.untokenize(tokens)
def transform_source(source, **kwargs): """Simple transformation: replaces any explicit float by a Decimal. By defining this function, we can also make use of Ideas' console. """ tokens = token_utils.tokenize(source) for token in tokens: if token.is_number() and "." in token.string: token.string = f"Decimal('{token.string}')" return token_utils.untokenize(tokens)
def french_to_english(source): """A simple replacement of 'French Python keyword' by their normal English version. """ new_tokens = [] for token in token_utils.tokenize(source): if token.string in fr_to_py: token.string = fr_to_py[token.string] new_tokens.append(token) new_source = token_utils.untokenize(new_tokens) return new_source
def function_as_a_keyword(source): """A simple replacement of ``function`` by ``lambda``. Note that, while the string ``lambda`` is shorter than ``function``, we do not adjust the information (start_col, end_col) about the position of the token. ``untokenize`` uses that information together with the information about each original line, to properly keep track of the spacing between tokens. """ new_tokens = [] for token in token_utils.tokenize(source): if token == "function": token.string = "lambda" new_tokens.append(token) return token_utils.untokenize(new_tokens)
def random_deletion(sentence, n=1): tokens = tokenize(sentence) # obviously, if there's only one word, don't delete it if len(tokens) == 1: return tokens # randomly delete upto n words count = 0 while count < n: assert n < len(tokens) rand_index = random.randint(0, len(tokens) - 1) del tokens[rand_index] count += 1 return untokenize(tokens)
def replace(sentence, the_word, synonym): tokens = tokenize(sentence) # replace the_word with synonym try: assert the_word in tokens except AssertionError: print("AssertionError") print("sentence: {}\nthe world: {}\nsynonym: {}".format(sentence, the_word, synonym)) return None new_tokens = [synonym if word == the_word else word for word in tokens] new_sentence = untokenize(new_tokens) # print("--old: ", sentence) # print("replaced", the_word, "with", synonym) # print("--new: ", new_sentence) return new_sentence
def add_multiplication_symbol(source): """This adds a multiplication symbol where it would be understood as being implicit by the normal way algebraic equations are written but would be a SyntaxError in Python. Thus we have:: 2n -> 2*n n 2 -> n* 2 2(a+b) -> 2*(a+b) (a+b)2 -> (a+b)*2 2 3 -> 2* 3 m n -> m* n (a+b)c -> (a+b)*c The obvious one (in algebra) being left out is something like ``n(...)`` which is a function call - and thus valid Python syntax. """ tokens = token_utils.tokenize(source) if not tokens: return tokens prev_token = tokens[0] new_tokens = [prev_token] for token in tokens[1:]: # The code has been written in a way to demonstrate that this type of # transformation could be done as the source is tokenized by Python. if ((prev_token.is_number() and (token.is_identifier() or token.is_number() or token == "(")) or (prev_token.is_identifier() and (token.is_identifier() or token.is_number())) or (prev_token == ")" and (token.is_identifier() or token.is_number()))): new_tokens.append("*") new_tokens.append(token) prev_token = token return token_utils.untokenize(new_tokens)
def random_swap(sentence, distance=1): """ randomly swap words in a sentence :params[in]: sentence, a string, input sentence :params[in]: distance, integer, distance of words :params[out]: n_sentence, a string, new sentence """ # lis = sent.split(' ') # split by spaces tokens = tokenize(sentence) tokens_length = len(tokens) assert tokens_length >= 2 index1 = random.randint(0, tokens_length - 1) # canidates pool candidates = set(range(index1 - distance, index1 + distance + 1)) & set(range(tokens_length)) candidates.remove(index1) # randomly sample another index index2 = random.sample(candidates, 1)[0] # swap two elements tokens[index1], tokens[index2] = tokens[index2], tokens[index1] # n_sen = ' '.join(lis) n_sentence = untokenize(tokens) # return new sentence return n_sentence
def check(source): tokens = token_utils.tokenize(source) new_source = token_utils.untokenize(tokens) print(len(source), len(new_source)) assert source == new_source
source = "a\n " source2 = "a\n\t" check(source) check(source2) check_lines(source) check_lines(source2) source1 = "a = b" source2 = "a = b # comment\n" source3 = """ if True: a = b # comment """ tokens1 = token_utils.tokenize(source1) tokens2 = token_utils.tokenize(source2) lines3 = token_utils.get_lines(source3) def test_first(): assert token_utils.get_first(tokens1) == token_utils.get_first(tokens2) assert token_utils.get_first(tokens1) == "a" assert token_utils.get_first(tokens2, exclude_comment=False) == "a" assert token_utils.get_first_index(tokens1) == 0 assert token_utils.get_first(lines3[2]) == "a" assert token_utils.get_first_index(lines3[2]) == 1 def test_last():
def toValidEqn(source): """This adds a multiplication symbol where it would be understood as being implicit by the normal way algebraic equations are written but would be a SyntaxError in Python. Thus we have:: 2N -> 2*N N 2 -> N* 2 2(A+B) -> 2*(A+B) (A+B)2 -> (A+B)*2 2 3 -> 2* 3 M N -> M* N (A+B)C -> (A+B)*C A(3) -> A*(3) a(3) -> a(3) - will only add multiplication if the preceding token is capital, since that is a variable """ """ Modified from ideas https://github.com/aroberge/ideas/blob/master/ideas/examples/implicit_multiplication.py """ constants = [ 'BLUE', 'RED', 'BLACK', 'MAGENTA', 'GREEN', 'ORANGE', 'BROWN', 'NAVY', 'LTBLUE', 'YELLOW', 'WHITE', 'LTGRAY', 'MEDGRAY', 'GRAY', 'DARKGRAY' ] tokens = token_utils.tokenize(source) if not tokens: return tokens prev_token = tokens[0] new_tokens = [prev_token] for token in tokens[1:]: if token.is_not_in(constants): # Check if implicit multiplication should be added if (((prev_token.is_number() or (prev_token.is_identifier() and prev_token.string.isupper())) and ((token.is_identifier() and token.string.isupper()) or token.is_number() or token == "(")) or ((prev_token.is_identifier() and prev_token.string.isupper()) and ((token.is_identifier() and token.string.isupper()) or token.is_number())) or (prev_token == ")" and ((token.is_identifier() and token.string.isupper()) or token.is_number()))): new_tokens.append("*") if token.is_identifier() and token.string.isupper() and len( token.string) > 1: # Multiple variables next to one another # ABC -> A*B*C token.string = '*'.join(token.string) new_tokens.append(token) else: new_tokens.append(token) else: # Token in constants, skip new_tokens.append(token) prev_token = token return token_utils.untokenize(new_tokens)