def test_not_found(self): node = ast.parse("a.b(c)") random_node = ast.Name(id="d") new_node = ReplaceNodeTransformer(random_node, node.body[0].value.func).visit(node) assert_is_not(new_node, node) assert_code_equal("a.b(c)\n", decompile(new_node))
def _move_out_var_from_yield(self, yield_info, indentation): """Helper for splitting up a yield node and moving it to an earlier place. For example, it will help turn: some_code((yield get_value.asynq())) into: value = yield get_value.asynq() ... some_code(value) Returns a pair of a list of lines to form the new assignment code (value = ...) and a Replacement object implementing the second change. """ varname = self.generate_varname_from_node(yield_info.yield_node.value) name_node = ast.Name(id=varname) replace = self.visitor.replace_node( yield_info.yield_node, name_node, current_statement=yield_info.statement_node, ) new_assign = ast.Assign(targets=[name_node], value=yield_info.yield_node) new_assign_code = decompile(new_assign, starting_indentation=indentation) assign_lines = [line + "\n" for line in new_assign_code.splitlines()] return assign_lines, replace
def test_UnaryOp(): check('not x') check('+x') check('-1') check('-(-1)') check('-(1+1j)') assert '-1\n' == decompile(ast.parse('-1'))
def main(): # Argument parser parser_args = argparse.ArgumentParser(prog='expresso', description='C-- interpreter') parser_args.add_argument('input', type=str, help='source code') parser_args.add_argument('-o', metavar='', type=str, default=None, help='python output') args = parser_args.parse_args() ################################# input_stream = FileStream(args.input) lexer = expressoLexer(input_stream) stream = CommonTokenStream(lexer) parser = expressoParser(stream) tree = parser.start() # Get AST # Transverse AST to generate python ast -> NOT YET IMPLEMENTED visitor = astVisitor(args.input) ast = visitor.visitStart(tree) code = compile(source=ast, filename=args.input, mode='exec') exec(code, globals()) if args.o: with open(args.o, 'w') as file: file.write(decompile(ast))
def test_UnaryOp(): check('not x') check('+x') check('-1') check('-(-1)') check('-(1+1j)') assert '-1\n' == decompile(ast.parse('-1'))
def _merge_assign_nodes(self, first_yield: YieldInfo, second_yield: YieldInfo) -> Replacement: # the basic approach is to extract the targets (left hand side of the assignment) and the # values to yield (on the right hand side) independently for each of the yield nodes and # then combine them. But there are different cases to consider first_node_target_value = first_yield.target_and_value() second_node_target_value = second_yield.target_and_value() targets = first_node_target_value[0] + second_node_target_value[0] values = first_node_target_value[1] + second_node_target_value[1] yield_node = ast.Yield(value=ast.Tuple(elts=values)) # if everything in targets is an underscore '_', then just avoid creating # an assignment statement if all( isinstance(target, ast.Name) and target.id == "_" for target in targets): new_node = ast.Expr(value=yield_node) else: new_node = ast.Assign(targets=[ast.Tuple(elts=targets)], value=yield_node) indent = self._indentation_of_node(first_yield.statement_node) new_code = decompile(new_node, starting_indentation=indent) lines_to_delete = list( range(first_yield.line_range[0], second_yield.line_range[-1] + 1)) return Replacement(lines_to_delete, [new_code])
def process_return(self, statement): text = decompile(statement).strip('\n').replace("return ", "") line = statement.lineno current_active_list = [node for node in self.active_code_block] self.list_of_statements_in_function.append( ('Return', line, [('return', text)], current_active_list)) log_info('Return statement found, adding to list of statements = {0}'. format(text))
def test_found(self): node = ast.parse("a.b(c)") replacement_node = ast.Name(id="d") new_node = ReplaceNodeTransformer(node.body[0].value.func, replacement_node).visit(node) # ensure it doesn't mutate the existing node in place assert_is_not(new_node, node) assert_code_equal("d(c)\n", decompile(new_node))
def assert_decompiles(code, result, do_check=True, **kwargs): """Asserts that code, when parsed, decompiles into result.""" decompile_result = decompile(ast.parse(code), **kwargs) if do_check: check(decompile_result) if result != decompile_result: print('>>> expected') print(result) print('>>> actual') print(decompile_result) print('>>> diff') for line in difflib.unified_diff(result.splitlines(), decompile_result.splitlines()): print(line) assert False, 'failed to decompile %s' % code
def reformat_as_single_line(python_code): code_ast = ast.parse(python_code.strip()) # The following has the unfortunate effect of not preserving quote style. # But so far, for getting code formatted using normal PEP8 conventions, in a # single line, this approach seems much easier compared to other approaches # I've tried. # # Tried: # # - Using Black as a library: adds lots of vertical and horizontal # whitespace in for long argument lists etc. # # - Using libcst - would require complicated manipulation of whitespace elements # to produce the PEP8 spacings around operators etc. return ast_decompiler.decompile(code_ast, indentation=0, line_length=100000).strip()
def get_variable_value(section): variable_value_list = [] # check for Binop if hasattr(section.value, 'left'): return [decompile(section.value)] # check for string if hasattr(section.value, 's'): return [section.value.s] # check for single value if hasattr(section.value, 'n'): return [section.value.n] if isinstance(section.value.elts, list): for value in section.value.elts: variable_value_list.append(value.n) return variable_value_list
def replace_node(self, current_node, new_node, current_statement=None): if current_statement is None: current_statement = self.current_statement if current_statement is None: return None transformer = ReplaceNodeTransformer(current_node, new_node) lines = self._lines() lines_to_remove = analysis_lib.get_line_range_for_node( current_statement, lines) indent = analysis_lib.get_indentation(lines[current_statement.lineno - 1]) node = transformer.visit(current_statement) try: parent_lines = decompile(node, starting_indentation=indent).splitlines() except NotImplementedError: return None lines_to_add = [line + "\n" for line in parent_lines] return Replacement(lines_to_remove, lines_to_add)
def check(code): """Checks that the code remains the same when decompiled and re-parsed.""" tree = ast.parse(code) new_code = decompile(tree) try: new_tree = ast.parse(new_code) except SyntaxError as e: print('>>> syntax error:') lineno = e.lineno - 1 min_lineno = max(0, lineno - 3) max_lineno = lineno + 3 for line in new_code.splitlines()[min_lineno:max_lineno]: print(line) raise dumped = ast.dump(ast.parse(code)) new_dumped = ast.dump(new_tree) if dumped != new_dumped: print(code) print(new_code) for line in difflib.unified_diff(dumped.split(), new_dumped.split()): print(line) assert False, '%s != %s' % (dumped, new_dumped)
def test_ListComp(): check('[x for x in y]') check('[x for x in y if z]') check('[x for x in y for z in a]') assert '[a for a, b in x]\n' == decompile(ast.parse('[a for a, b in x]'))
], decorator_list=[dataclass_decorator] ), Assign( targets=[ Name(id='SLIDES', ctx=Store()) ], value=List( elts=[ Name(id='Slide1', ctx=Load()), Name(id='Slide2', ctx=Load()) ], ctx=Load() ), type_comment=None ) ], type_ignores=[] ) if __name__ == '__main__': BASE_DIR = os.path.dirname(os.path.abspath(__file__)) target = os.path.join(BASE_DIR, 'stub_sample.py') with open(target, mode='r') as fp: data = fp.read() at = ast.parse(data) print(ast.dump(at)) # print(decompile(clone)) print(decompile(at))
def test_ListComp(): check('[x for x in y]') check('[x for x in y if z]') check('[x for x in y for z in a]') assert '[a for a, b in x]\n' == decompile(ast.parse('[a for a, b in x]'))
def _check_for_duplicate_yields(self, node, current_statement): if not isinstance(node.value, ast.Tuple) or len(node.value.elts) < 2: return duplicate_indices = {} # index to first index seen = {} # ast.dump result to index for i, member in enumerate(node.value.elts): # identical AST nodes don't compare equally, so just stringify them for comparison code = ast.dump(member) if code in seen: duplicate_indices[i] = seen[code] else: seen[code] = i if not duplicate_indices: return new_members = [ elt for i, elt in enumerate(node.value.elts) if i not in duplicate_indices ] if len(new_members) == 1: new_value = new_members[0] else: new_value = ast.Tuple(elts=new_members) new_yield_node = ast.Yield(value=new_value) if isinstance(current_statement, ast.Expr) and current_statement.value is node: new_nodes = [ast.Expr(value=new_yield_node)] elif ( isinstance(current_statement, ast.Assign) and current_statement.value is node ): if ( len(current_statement.targets) != 1 or not isinstance(current_statement.targets[0], ast.Tuple) or len(current_statement.targets[0].elts) != len(node.value.elts) ): new_nodes = None else: new_targets = [] # these are for cases where we do something like # a, b = yield f.asynq(), f.asynq() # we turn this into # a = yield f.asynq() # b = a extra_nodes = [] assignment_targets = current_statement.targets[0].elts for i, target in enumerate(assignment_targets): if i not in duplicate_indices: new_targets.append(target) elif not (isinstance(target, ast.Name) and target.id == "_"): extra_nodes.append( ast.Assign( targets=[target], value=assignment_targets[duplicate_indices[i]], ) ) if len(new_targets) == 1: new_target = new_targets[0] else: new_target = ast.Tuple(elts=new_targets) new_assign = ast.Assign(targets=[new_target], value=new_yield_node) new_nodes = [new_assign] + extra_nodes else: new_nodes = None if new_nodes is not None: lines_to_delete = self._lines_of_node(node) indent = self._indentation_of_node(current_statement) new_code = "".join( decompile(node, starting_indentation=indent) for node in new_nodes ) new_lines = [line + "\n" for line in new_code.splitlines()] replacement = Replacement(lines_to_delete, new_lines) else: replacement = None self.visitor.show_error( node, error_code=ErrorCode.duplicate_yield, replacement=replacement )
def process_while(self, statement): # first need to generate unique names for this loop structure self.conditional_count += 1 this_wh_t = 'WH' + str.zfill(str(self.conditional_count), 3) + 'T' this_wh_f = 'WH' + str.zfill(str(self.conditional_count), 3) + 'F' # set up the sections of the structure defaulting to not existing # initiate variables to store linenos for the three conditional sections # the initial zero is for checking if there is any code in the section 0 means there isn't test_lineno, body_lineno, orelse_lineno = 0, 0, 0 # extract the position and text of the test test_lineno = statement.test.lineno test_condition = decompile(statement.test) self.conditions_dict[this_wh_t] = [test_lineno, test_condition] # this might need adjusting to suit the phrasing of the prolog program self.conditions_dict[this_wh_f] = [ test_lineno, ' not ( ' + test_condition + ' )' ] # if the body section exists get its start lineno if dhf.get_fields(statement.body): log_info('{0} exists'.format(this_wh_t)) body_lineno = statement.body[0].lineno else: log_info('{0} does not exist'.format(this_wh_t)) # if the orelse section exists get its start lineno if dhf.get_fields(statement.orelse): log_info('{0} exists'.format(this_wh_f)) orelse_lineno = statement.orelse[0].lineno else: log_info('{0} does not exist'.format(this_wh_f)) # split each path that contains this while loop to include a path for True and False code blocks active_paths_true = [] untouched_paths = [] log_info('paths contains {0}'.format(str(self.paths))) parents = self.active_code_block for a_path in self.paths: # this is the code block that the while statement is in direct_parent_code_block = parents[-1] # if this path contains this while loop if direct_parent_code_block in a_path: # then remove this path # and for each entry in the LOOP_LIST insert a new path log_info('{0} has {1}'.format(a_path, direct_parent_code_block)) if body_lineno != 0: for path_cycles in LOOP_LIST: current_new_path = [ a_path + [this_wh_f] + [this_wh_t] * path_cycles ] active_paths_true += current_new_path print('active_paths += ', current_new_path) else: log_info("{0} doesn't have {1}".format(a_path, parents)) untouched_paths.append(a_path) if active_paths_true: self.paths = active_paths_true + untouched_paths log_info('paths list now contains {0}'.format(self.paths)) log_info(str(len(self.paths))) log_info('IF found, TEST= {0}, BODY on {1} ORELSE on {2}'.format( test_lineno, body_lineno, orelse_lineno)) # first the body section if body_lineno != 0: self.recurse_node(this_wh_t, statement.body) # then the orelse 'False' path if orelse_lineno != 0: self.recurse_node(this_wh_f, statement.orelse)
def process_if(self, statement): # generate a unique identify the current "if" statement starting at IF001 self.conditional_count += 1 this_ift = 'IF' + str.zfill(str(self.conditional_count), 3) + 'T' this_iff = 'IF' + str.zfill(str(self.conditional_count), 3) + 'F' # initiate variables to store linenos for the three conditional sections # the initial zero is for checking if there is any code in the section 0 means there isn't test_lineno, body_lineno, orelse_lineno = 0, 0, 0 # extract the position and text of the test test_lineno = statement.test.lineno test_condition = decompile(statement.test) self.conditions_dict[this_ift] = [test_lineno, test_condition] # this might need adjusting to suit the phrasing of the prolog program self.conditions_dict[this_iff] = [ test_lineno, ' not ( ' + test_condition + ' )' ] # if the body section exists get its start lineno if dhf.get_fields(statement.body): log_info('{0} exists'.format(this_ift)) body_lineno = statement.body[0].lineno else: log_info('{0} does not exist'.format(this_ift)) # if the orelse section exists get its start lineno if dhf.get_fields(statement.orelse): log_info('{0} exists'.format(this_iff)) orelse_lineno = statement.orelse[0].lineno else: log_info('{0} does not exist'.format(this_iff)) # add new paths to the path list active_paths_true = [] active_paths_false = [] log_info('paths contains {0}'.format(str(self.paths))) parents = self.active_code_block untouched_paths = [] for path in self.paths: direct_parent = parents[-1] if direct_parent in path: log_info('{0} has {1}'.format(path, parents)) if body_lineno != 0: active_paths_true += [path + [this_ift]] # if there is an else statement, add the orelse to paths if orelse_lineno != 0: active_paths_false += [path + [this_iff]] else: log_info("{0} doesn't have {1}".format(path, parents)) untouched_paths.append(path) if active_paths_true or active_paths_false: self.paths = active_paths_true + active_paths_false + untouched_paths log_info('paths list now contains {0}'.format(self.paths)) log_info(str(len(self.paths))) log_info('IF found, TEST= {0}, BODY on {1} ORELSE on {2}'.format( test_lineno, body_lineno, orelse_lineno)) # first the body section if body_lineno != 0: self.recurse_node(this_ift, statement.body) # then the orelse 'False' path if orelse_lineno != 0: self.recurse_node(this_iff, statement.orelse)
def ast_to_source_decompile(node): from ast_decompiler import decompile return decompile(node).strip()
def make_stub(pptx_path: str, output_path: str): master_ppt = Presentation(pptx_path) ast_result = generate_ast(master_ppt.slides) with open(output_path, mode='w') as fp: fp.write(decompile(ast_result))
def test_non_module(): assert '3' == decompile(ast.Num(n=3)) assert '1 + 1' == decompile(ast.BinOp(op=ast.Add(), left=ast.Num(n=1), right=ast.Num(n=1)))
def test_GeneratorExp(): check('(x for x in y)') check('(x for x in y if z)') check('(x for x in y for z in a)') check('f(x for x in y)') assert 'f(x for x in y)\n' == decompile(ast.parse('f(x for x in y)'))
def test_GeneratorExp(): check('(x for x in y)') check('(x for x in y if z)') check('(x for x in y for z in a)') check('f(x for x in y)') assert 'f(x for x in y)\n' == decompile(ast.parse('f(x for x in y)'))
def test_non_module(): assert '3' == decompile(ast.Num(n=3)) assert '1 + 1' == decompile( ast.BinOp(op=ast.Add(), left=ast.Num(n=1), right=ast.Num(n=1)))
def assert_is_changed(self, old_code, new_code): old_ast = ast.parse(old_code) new_ast = self.transformer_cls().visit(old_ast) assert_code_equal(new_code, decompile(new_ast))