def replace_instructions(source): #Parte chiamata dal codice """ For each line, if it is neccessary, it replaces an instruction with a sequence of instructions. :param lines: Result from tokenizer.tokenize_file(...). :return: A list of lines. """ lines = tokenizer.tokenize_file(source) #nel file "tokenizer.py", read una line nel file source (in questo caso l'output.py) for index, line in enumerate(lines): line_tokenized = tokenizer.tokenize_line(line) #line_tokenized è una lista di stringhe line_to_replace = line # short to long pattern = '\w+[\+\-\*\/]=\w+' #uso di una ReGeX if re.search(pattern, line) is not None: line_to_replace = short_to_long(line_tokenized) # trasforma una line in cui vi è un'operazione dalla forma short # a quella long (esempio v+=1 --> v=v+1) line_tokenized = tokenizer.tokenize_line(line_to_replace) # ricerca del pattern corret pattern = match_pattern(line_to_replace) if pattern == 0: # var = var + var # ricerca della corretta operazione eseguita # sostituisco la riga in posizione (index) con for o while per fare lo stesso incremento della variabile # di fatto si va ad aumentare la lunghezza del programma come numero di righe e come tempi di esecuzione operators = get_operators(line_tokenized) if operators['op'] == '+' or operators['op'] == '-': lines[index] = generate_sum_sub_var_var_var(operators) elif operators['op'] == '*': lines[index] = generate_mult_var_var_var(operators) elif operators['op'] == '/': lines[index] = generate_div_var_var_var(operators) elif pattern == 1: # var = var + num # ricerca della corretta operazione eseguita operators = get_operators(line_tokenized) if operators['op'] == '+' or operators['op'] == '-': lines[index] = generate_sum_sub_var_var_num(operators) elif operators['op'] == '*': lines[index] = generate_mult_var_var_num(operators) elif operators['op'] == '/': lines[index] = generate_div_var_var_num(operators) elif pattern == 2: # var = num + var # ricerca della corretta operazione eseguita operators = get_operators(line_tokenized) if operators['op'] == '+' or operators['op'] == '-': lines[index] = generate_sum_sub_var_num_var(operators) elif operators['op'] == '*': lines[index] = generate_mult_var_num_var(operators) elif operators['op'] == '/': lines[index] = generate_div_var_num_var(operators) return lines
def replace_instructions(source): """ For each line, if it is neccessary, it replaces an instruction with a sequence of instructions. :param lines: Result from tokenizer.tokenize_file(...). :return: A list of lines. """ lines = tokenizer.tokenize_file(source) for index, line in enumerate(lines): line_tokenized = tokenizer.tokenize_line(line) line_to_replace = line # short to long pattern = '\w+[\+\-\*\/]=\w+' if re.search(pattern, line) is not None: line_to_replace = short_to_long(line_tokenized) line_tokenized = tokenizer.tokenize_line(line_to_replace) # match the correct pattern pattern = match_pattern(line_to_replace) if pattern == 0: # var = var + var # match the correct operation operators = get_operators(line_tokenized) if operators['op'] == '+' or operators['op'] == '-': lines[index] = generate_sum_sub_var_var_var(operators) elif operators['op'] == '*': lines[index] = generate_mult_var_var_var(operators) elif operators['op'] == '/': lines[index] = generate_div_var_var_var(operators) elif pattern == 1: # var = var + num # match the correct operation operators = get_operators(line_tokenized) if operators['op'] == '+' or operators['op'] == '-': lines[index] = generate_sum_sub_var_var_num(operators) elif operators['op'] == '*': lines[index] = generate_mult_var_var_num(operators) elif operators['op'] == '/': lines[index] = generate_div_var_var_num(operators) elif pattern == 2: # var = num + var # match the correct operation operators = get_operators(line_tokenized) if operators['op'] == '+' or operators['op'] == '-': lines[index] = generate_sum_sub_var_num_var(operators) elif operators['op'] == '*': lines[index] = generate_mult_var_num_var(operators) elif operators['op'] == '/': lines[index] = generate_div_var_num_var(operators) return lines
def search_function_to_replace(line, dictionary): """ For each line, it searchs for function name, creates new variables and saves them in a dictionary. :param line: A single line from tokenizer.tokenize_file(...). :param dictionary: Variable dictionary. """ token_line = tokenizer.tokenize_line(line) #spezzo la line for ind, tok in enumerate(token_line): old = '' if token_line[ind][1] == 'def' and token_line[ind + 1][ 0] == token.NAME: #se la line è una dichiarazione di funzione #quindi ha una def old = token_line[ind + 1][1] #salvo l'od line replace = generate( ) #genero un nuovo nuìome per la funzione (generate() in generate_replacement.py) if replace not in dictionary.values( ) and old not in replacement_dic.keys( ) and not old == '': #se questo nome non è #già nel dizionario e non è quello che si vuole cambiare # Non seve il controllo se un nuovo nome di variabile esiste o meno, siamo sicuri che sia univoco # per il discorso di probabilità #while replace in replacement_dic.values(): #se il nuovo nome è già presente nel dizionario dei nomi delle funzioni # replace = generate() #nel caso, genero un altro nome random replacement_dic[ old] = replace #sostituisco all'indice dell'old il nuovo nome della funzione
def replace(lines): """ For each line, it replaces the old variables name with the new ones. :param lines: A list of lines. :return: A list of modified lines. """ for index, line in enumerate(lines): #guardo tutte le righe if not line == '\n': #se non è una riga da andare a capo e basta token_line = tokenizer.tokenize_line(line) #spezzo la linea for ind, tok in enumerate( token_line): #emumero le porzioni della riga #identifico se le porzioni sono nei dizionari per essere sostituitex if token_line[ind][1] in replacement_dic.keys( ) and token_line[ind][1] not in ignore_variable: if ind > 1 and token_line[ind - 2][1] in import_list: continue if token_line[ind][0] == token.NAME and token_line[ ind + 1][1] == '(': continue token_line[ind][1] = replacement_dic.get( token_line[ind][1]) lines[index] = tokenizer.untokenize_line(token_line) return lines #lista di linee modificate
def test_detokenize(self): line = '/autodily-vlastni/naradi.htm' tokens = tokenizer.tokenize_line(line) string_to_check = tokenizer.detokenize_line(tokens) assert (line == string_to_check)
def replace(lines): """ For each line, it replaces the old functions name with the new ones. :param lines: A list of lines. :return: A list of modified lines. """ for index, line in enumerate(lines): if not line == '\n': token_line = tokenizer.tokenize_line(line) for ind, token in enumerate(token_line): if token_line[ind][1] in replacement_dic.keys(): token_line[ind][1] = replacement_dic.get(token_line[ind][1]) lines[index] = tokenizer.untokenize_line(token_line) return lines
def replace(lines): """ For each line, it replaces the old functions name with the new ones. :param lines: A list of lines. :return: A list of modified lines. """ for index, line in enumerate(lines): #guardo ogni line del file if not line == '\n': #evito le line con lo \n, quelle che vanno a capo token_line = tokenizer.tokenize_line(line) #spezzo la line for ind, token in enumerate(token_line): #guardo i token della line if token_line[ind][1] in replacement_dic.keys(): #se il nome della def è nel dizionario delle funzioni da cambiare token_line[ind][1] = replacement_dic.get(token_line[ind][1]) #cambio il nome della funzione lines[index] = tokenizer.untokenize_line(token_line) #ricostruisco la line return lines #ritorno le righe del file
def search_function_to_replace(line,dictionary): """ For each line, it searchs for function name, creates new variables and saves them in a dictionary. :param line: A single line from tokenizer.tokenize_file(...). :param dictionary: Variable dictionary. """ token_line = tokenizer.tokenize_line(line) for ind, tok in enumerate(token_line): old = '' if token_line[ind][1] == 'def' and token_line[ind+1][0] == token.NAME: old = token_line[ind+1][1] replace = generate() if replace not in dictionary.values() and old not in replacement_dic.keys() and not old == '': while replace in replacement_dic.values(): replace = generate() replacement_dic[old] = replace
def parse_file(path): """:returns dictionary {0 : dict, 1 : dict, ...}""" print('parsing file {0}'.format(path)) counter = 0 line_num_to_tokens_dict = {} try: file = open(path, 'r') for line in file: if (line == ''): break line_num_to_tokens_dict[counter] = tokenize_line(line) counter += 1 file.close() return line_num_to_tokens_dict except FileNotFoundError: print('{0} File not Found'.format(path))
def replace(lines): """ For each line, it replaces the old variables name with the new ones. :param lines: A list of lines. :return: A list of modified lines. """ for index, line in enumerate(lines): if not line == '\n': token_line = tokenizer.tokenize_line(line) for ind, tok in enumerate(token_line): if token_line[ind][1] in replacement_dic.keys() and token_line[ind][1] not in ignore_variable: if ind > 1 and token_line[ind-2][1] in import_list: continue if token_line[ind][0] == token.NAME and token_line[ind+1][1] == '(': continue token_line[ind][1] = replacement_dic.get(token_line[ind][1]) lines[index] = tokenizer.untokenize_line(token_line) return lines
def replace_constant_while(lines): """ Replace a constant into a while statement. :param lines: A list of lines returned from tokenizer.tokenize_file(...) :return: A list of lines. """ for index, line in enumerate(lines): line_tokenized = tokenizer.tokenize_line(line) if is_while(line): constant = get_constant(line_tokenized) if constant is not None and constant <= 100000000000: # inject factorization random_function_name = utils.get_random_var(vars) vars.add(random_function_name) lines[index] = replace_while(line_tokenized, random_function_name) new_def.append( generate_factorization_function(random_function_name)) return lines
def replace_constant_var_num(lines): """ For each line, if it is neccessary, it replaces a constant assignment to a variable with a call to a new function. :param lines: A list of lines returned from tokenizer.tokenize_file(...) :return: A list of lines. """ for index, line in enumerate(lines): line_tokenized = tokenizer.tokenize_line(line) if is_var_num(line): constant = get_constant(line_tokenized) if constant is not None: if constant <= 100000000000: if random.randint(0, 1) == 0: if is_not_prime(constant): # inject factorization random_function_name = utils.get_random_var(vars) vars.add(random_function_name) lines[index] = replace_var_constant( line_tokenized, random_function_name, constant) new_def.append( generate_factorization_function( random_function_name)) else: random_function_name = utils.get_random_var(vars) vars.add(random_function_name) lines[index] = replace_var_constant( line_tokenized, random_function_name, constant) new_def.append( generate_ascii_function(random_function_name)) else: random_function_name = utils.get_random_var(vars) vars.add(random_function_name) lines[index] = replace_var_constant( line_tokenized, random_function_name, constant) new_def.append( generate_ascii_function(random_function_name)) return lines
def replace_constant_for(lines): """ Replace a constant into a for statement. :param lines: A list of lines returned from tokenizer.tokenize_file(...) :return: A list of lines. """ for index, line in enumerate(lines): line_tokenized = tokenizer.tokenize_line(line) if is_for(line): spec = get_for_spec(line_tokenized) if is_a_integer(spec['end']): if int(spec['end']) <= 100000000000: # inject factorization random_function_name = utils.get_random_var(vars) vars.add(random_function_name) lines[index] = replace_for(line_tokenized, spec, random_function_name) new_def.append( generate_factorization_function(random_function_name)) return lines
def search_variable_to_replace(line): """ For each line, it searchs for variables name, creates new variables and saves them in a dictionary. :param line: A single line from tokenizer.tokenize_file(...). """ token_line = tokenizer.tokenize_line(line) for ind, tok in enumerate(token_line): old = '' # case 1: (var) or (var, if token_line[ind][1] == '(' and token_line[ ind + 1][0] == token.NAME and (token_line[ind + 2][1] == ')' or token_line[ind + 2][1] == ','): old = token_line[ind + 1][1] # case 2: (var ) or (var , elif token_line[ind][1] == '(' and token_line[ ind + 1][0] == token.NAME and token_line[ind + 2][1] == ' ' and ( token_line[ind + 3][1] == ')' or token_line[ind + 3][1] == ','): old = token_line[ind + 1][1] # case 3: ( var) or ( var, elif token_line[ind][1] == '(' and token_line[ ind + 1][1] == ' ' and token_line[ind + 2][0] == token.NAME and ( token_line[ind + 3][1] == ')' or token_line[ind + 3][1] == ','): old = token_line[ind + 2][1] # case 4: ( var ) or ( var , elif token_line[ind][1] == '(' and token_line[ ind + 1][1] == ' ' and token_line[ ind + 2][0] == token.NAME and token_line[ind + 3][1] == ' ' and ( token_line[ind + 4][1] == ')' or token_line[ind + 4][1] == ','): old = token_line[ind + 2][1] # case 5 ,var) or ,var, elif token_line[ind][1] == ',' and token_line[ ind + 1][0] == token.NAME and (token_line[ind + 2][1] == ')' or token_line[ind + 2][1] == ','): old = token_line[ind + 1][1] # case 6: , var) or , var, elif token_line[ind][1] == ',' and token_line[ ind + 1][1] == ' ' and token_line[ind + 2][0] == token.NAME and ( token_line[ind + 3][1] == ')' or token_line[ind + 3][1] == ','): old = token_line[ind + 2][1] # case 7: ,var ) or ,var , elif token_line[ind][1] == ',' and token_line[ ind + 1][0] == token.NAME and token_line[ind + 2][1] == ' ' and ( token_line[ind + 3][1] == ')' or token_line[ind + 3][1] == ','): old = token_line[ind + 1][1] # case 8: , var ) or , var , elif token_line[ind][1] == ',' and token_line[ ind + 1][1] == ' ' and token_line[ ind + 2][0] == token.NAME and token_line[ind + 3][1] == ' ' and ( token_line[ind + 4][1] == ')' or token_line[ind + 4][1] == ','): old = token_line[ind + 2][1] # case 9: assignment elif token_line[ind][0] == token.NAME and ( token_line[ind + 1][1] == '=' or token_line[ind + 2][1] == '='): old = token_line[ind][1] # case 10: as var : elif token_line[ind][1] == 'as' and ( (token_line[ind + 1][0] == token.NAME and token_line[ind + 2][1] == ':') or token_line[ind + 1][0] == token.NAME): old = token_line[ind + 1][1] # case 11: for var elif token_line[ind][1] == 'for' and token_line[ind + 1][0] == token.NAME: old = token_line[ind + 1][1] # case 12: if var elif token_line[ind][1] == 'if' and token_line[ ind + 1][0] == token.NAME and not token_line[ind + 2][1] == '(': old = token_line[ind + 1][1] # case 13: save import module elif token_line[ind][1] == 'import' and token_line[ind + 1][0] == token.NAME: import_list.append(token_line[ind + 1][1]) if old not in replacement_dic.keys() and not old == '': replace = generate() while replace in replacement_dic.values(): replace = generate() replacement_dic[old] = replace
def search_variable_to_replace(line): """ For each line, it searchs for variables name, creates new variables and saves them in a dictionary. :param line: A single line from tokenizer.tokenize_file(...). """ token_line = tokenizer.tokenize_line(line) #spezzo la line #prendo tutti i nomi delle variabili per sostituirli con variabili a caso for ind, tok in enumerate(token_line): old = '' # case 1: (var) or (var, if token_line[ind][1] == '(' and token_line[ind+1][0] == token.NAME and (token_line[ind+2][1] == ')' or token_line[ind+2][1] == ','): old = token_line[ind+1][1] # case 2: (var ) or (var , elif token_line[ind][1] == '(' and token_line[ind+1][0] == token.NAME and token_line[ind+2][1] == ' ' and (token_line[ind+3][1] == ')' or token_line[ind+3][1] == ','): old = token_line[ind+1][1] # case 3: ( var) or ( var, elif token_line[ind][1] == '(' and token_line[ind+1][1] == ' ' and token_line[ind+2][0] == token.NAME and (token_line[ind+3][1] == ')' or token_line[ind+3][1] == ','): old = token_line[ind+2][1] # case 4: ( var ) or ( var , elif token_line[ind][1] == '(' and token_line[ind+1][1] == ' ' and token_line[ind+2][0] == token.NAME and token_line[ind+3][1] == ' ' and (token_line[ind+4][1] == ')' or token_line[ind+4][1] == ','): old = token_line[ind+2][1] # case 5 ,var) or ,var, elif token_line[ind][1] == ',' and token_line[ind+1][0] == token.NAME and (token_line[ind+2][1] == ')' or token_line[ind+2][1] == ','): old = token_line[ind+1][1] # case 6: , var) or , var, elif token_line[ind][1] == ',' and token_line[ind+1][1] == ' ' and token_line[ind+2][0] == token.NAME and (token_line[ind+3][1] == ')' or token_line[ind+3][1] == ','): old = token_line[ind+2][1] # case 7: ,var ) or ,var , elif token_line[ind][1] == ',' and token_line[ind+1][0] == token.NAME and token_line[ind+2][1] == ' ' and (token_line[ind+3][1] == ')' or token_line[ind+3][1] == ','): old = token_line[ind+1][1] # case 8: , var ) or , var , elif token_line[ind][1] == ',' and token_line[ind+1][1] == ' ' and token_line[ind+2][0] == token.NAME and token_line[ind+3][1] == ' ' and (token_line[ind+4][1] == ')' or token_line[ind+4][1] == ','): old = token_line[ind+2][1] # case 9: assignment elif token_line[ind][0] == token.NAME and (token_line[ind+1][1] == '=' or token_line[ind+2][1] == '='): old = token_line[ind][1] # case 10: as var : elif token_line[ind][1] == 'as' and ((token_line[ind+1][0] == token.NAME and token_line[ind+2][1] == ':') or token_line[ind+1][0] == token.NAME): old = token_line[ind+1][1] # case 11: for var elif token_line[ind][1] == 'for' and token_line[ind+1][0] == token.NAME: old = token_line[ind+1][1] # case 12: if var elif token_line[ind][1] == 'if' and token_line[ind+1][0] == token.NAME and not token_line[ind+2][1] == '(': old = token_line[ind+1][1] # case 13: save import module elif token_line[ind][1] == 'import' and token_line[ind+1][0] == token.NAME: import_list.append(token_line[ind+1][1]) if old not in replacement_dic.keys() and not old == '': #se old non è già presente nel dizionario di quello che si deve sostituire replace = generate() #genero un nome di una funziomne a caso # Non seve il controllo se un nuovo nome di variabile esiste o meno, siamo sicuri che sia univoco # per il discorso di probabilità #while replace in replacement_dic.values(): #nel caso in cui creo un nome di una variabile già presente # replace = generate() #genero un nome di una funziomne a caso replacement_dic[old] = replace #cambio il vecchio nome della funzione con quello nuovo!
def test_all_words(self): line = '/autodily-vlastni/naradi.html' tokenized_line = tokenizer.tokenize_line(line) for token_group in tokenized_line['tokenized link']: for token in token_group: assert (token.token_id == tokenizer.TokenId.WORD)
def test_mixed(self): line = '/autodily1-mydlo9/' tokenized_line = tokenizer.tokenize_line(line) for token_group in tokenized_line['tokenized link']: for token in token_group: assert (token.token_id == tokenizer.TokenId.WORD_WITH_NUMBERS)
def test_all_numbers(self): line = '/10-234/420' tokenized_line = tokenizer.tokenize_line(line) for token_group in tokenized_line['tokenized link']: for token in token_group: assert (token.token_id == tokenizer.TokenId.NUMBER)