def visit(target, value): label = LabelVisitor() label.visit(target) rhs_visitor = RHSVisitor() rhs_visitor.visit(value) if isinstance(value, ast.Call): new_ast_node = ast.Assign(target, value) ast.copy_location(new_ast_node, target) new_assignment_nodes.append( self.assignment_call_node(label.result, new_ast_node) ) else: label.result += " = " label.visit(value) new_assignment_nodes.append( self.append_node( AssignmentNode( label.result, extract_left_hand_side(target), ast.Assign(target, value), rhs_visitor.result, line_number=target.lineno, path=self.filenames[-1], ) ) ) remaining_targets.remove(target) remaining_values.remove(value) for var in rhs_visitor.result: remaining_variables.remove(var)
def visit_Assign(self, node): rhs_visitor = RHSVisitor() rhs_visitor.visit(node.value) if isinstance(node.targets[0], (ast.Tuple, ast.List)): # x,y = [1,2] if isinstance(node.value, (ast.Tuple, ast.List)): return self.assign_tuple_target( node.targets[0].elts, node.value.elts, rhs_visitor.result ) elif isinstance(node.value, ast.Call): call = None for element in node.targets[0].elts: label = LabelVisitor() label.visit(element) call = self.assignment_call_node(label.result, node) return call elif isinstance( node.value, ast.Name ): # Treat `x, y = z` like `x, y = (*z,)` value_node = ast.Starred(node.value, ast.Load()) ast.copy_location(value_node, node) return self.assign_tuple_target( node.targets[0].elts, [value_node], rhs_visitor.result ) else: label = LabelVisitor() label.visit(node) return self.append_node( AssignmentNode( label.result, label.result, node, rhs_visitor.result, path=self.filenames[-1], ) ) elif len(node.targets) > 1: # x = y = 3 return self.assign_multi_target(node, rhs_visitor.result) else: if isinstance(node.value, ast.Call): # x = call() label = LabelVisitor() label.visit(node.targets[0]) return self.assignment_call_node(label.result, node) else: # x = 4 label = LabelVisitor() label.visit(node) return self.append_node( AssignmentNode( label.result, extract_left_hand_side(node.targets[0]), node, rhs_visitor.result, path=self.filenames[-1], ) )
def get_sink_args(cfg_node): if isinstance(cfg_node.ast_node, ast.Call): rhs_visitor = RHSVisitor() rhs_visitor.visit(cfg_node.ast_node) return rhs_visitor.result elif isinstance(cfg_node.ast_node, ast.Assign): return cfg_node.right_hand_side_variables elif isinstance(cfg_node, BBorBInode): return cfg_node.args else: vv = VarsVisitor() vv.visit(cfg_node.ast_node) return vv.result
def visit_Return(self, node): label = LabelVisitor() label.visit(node) this_function_name = self.function_return_stack[-1] LHS = "ret_" + this_function_name if isinstance(node.value, ast.Call): return_value_of_call = self.visit(node.value) if not hasattr(return_value_of_call, "left_hand_side"): return None return_node = ReturnNode( LHS + " = " + return_value_of_call.left_hand_side, LHS, node, [return_value_of_call.left_hand_side], path=self.filenames[-1], ) if return_value_of_call is not None: return_value_of_call.connect(return_node) return self.append_node(return_node) elif node.value is not None: rhs_visitor_result = RHSVisitor.result_for_node(node.value) else: rhs_visitor_result = [] return self.append_node( ReturnNode( LHS + " = " + label.result, LHS, node, rhs_visitor_result, path=self.filenames[-1], ) )
def visit_AugAssign(self, node): label = LabelVisitor() label.visit(node) rhs_visitor = RHSVisitor() rhs_visitor.visit(node.value) lhs = extract_left_hand_side(node.target) return self.append_node( AssignmentNode( label.result, lhs, node, rhs_visitor.result + [lhs], path=self.filenames[-1], ))
def visit_Yield(self, node): label = LabelVisitor() label.visit(node) if node.value is None: rhs_visitor_result = [] else: rhs_visitor_result = RHSVisitor.result_for_node(node.value) # Yield is a bit like augmented assignment to a return value this_function_name = self.function_return_stack[-1] LHS = "yld_" + this_function_name return self.append_node( YieldNode( LHS + " += " + label.result, LHS, node, rhs_visitor_result + [LHS], path=self.filenames[-1], ))
def add_blackbox_or_builtin_call(self, node, blackbox): # noqa: C901 """Processes a blackbox or builtin function when it is called. Nothing gets assigned to ret_func_foo in the builtin/blackbox case. Increments self.function_call_index each time it is called, we can refer to it as N in the comments. Create e.g. ~call_1 = ret_func_foo RestoreNode. Create e.g. temp_N_def_arg1 = call_arg1_label_visitor.result for each argument. Visit the arguments if they're calls. (save_def_args_in_temp) I do not think I care about this one actually -- Create e.g. def_arg1 = temp_N_def_arg1 for each argument. (create_local_scope_from_def_args) Add RestoreNode to the end of the Nodes. Args: node(ast.Call) : The node that calls the definition. blackbox(bool): Whether or not it is a builtin or blackbox call. Returns: call_node(BBorBInode): The call node. """ self.function_call_index += 1 saved_function_call_index = self.function_call_index self.undecided = False call_label_visitor = LabelVisitor() call_label_visitor.visit(node) call_function_label = call_label_visitor.result[ : call_label_visitor.result.find("(") ] # Check if function call matches a blackbox/built-in alias and if so, resolve it # This resolves aliases like "from os import system as mysys" as: mysys -> os.system local_definitions = self.module_definitions_stack[-1] call_function_label = fully_qualify_alias_labels( call_function_label, local_definitions.import_alias_mapping ) # Create e.g. ~call_1 = ret_func_foo LHS = CALL_IDENTIFIER + "call_" + str(saved_function_call_index) RHS = "ret_" + call_function_label + "(" call_node = BBorBInode( label="", left_hand_side=LHS, ast_node=node, right_hand_side_variables=[], line_number=node.lineno, path=self.filenames[-1], func_name=call_function_label, ) visual_args = list() rhs_vars = list() last_return_value_of_nested_call = None for arg_node in itertools.chain(node.args, node.keywords): arg = arg_node.value if isinstance(arg_node, ast.keyword) else arg_node if isinstance(arg, ast.Call): return_value_of_nested_call = self.visit(arg) if last_return_value_of_nested_call: # connect inner to other_inner in e.g. # `scrypt.outer(scrypt.inner(image_name), scrypt.other_inner(image_name))` # I should probably loop to the inner most call of other_inner here. try: last_return_value_of_nested_call.connect( return_value_of_nested_call.first_node ) except AttributeError: last_return_value_of_nested_call.connect( return_value_of_nested_call ) else: # I should only set this once per loop, inner in e.g. # `scrypt.outer(scrypt.inner(image_name), scrypt.other_inner(image_name))` # (inner_most_call is used when predecessor is a ControlFlowNode in connect_control_flow_node) call_node.inner_most_call = return_value_of_nested_call last_return_value_of_nested_call = return_value_of_nested_call if isinstance(arg_node, ast.keyword) and arg_node.arg is not None: visual_args.append( arg_node.arg + "=" + return_value_of_nested_call.left_hand_side ) else: if hasattr(return_value_of_nested_call, "left_hand_side"): visual_args.append(return_value_of_nested_call.left_hand_side) if hasattr(return_value_of_nested_call, "left_hand_side"): rhs_vars.append(return_value_of_nested_call.left_hand_side) else: label = LabelVisitor() label.visit(arg_node) visual_args.append(label.result) vv = VarsVisitor() vv.visit(arg_node) rhs_vars.extend(vv.result) if last_return_value_of_nested_call: # connect other_inner to outer in e.g. # `scrypt.outer(scrypt.inner(image_name), scrypt.other_inner(image_name))` last_return_value_of_nested_call.connect(call_node) call_names = list(get_call_names(node.func)) if len(call_names) > 1: # taint is a RHS variable (self) of taint.lower() rhs_vars.append(call_names[0]) if len(visual_args) > 0: for arg in visual_args: RHS = RHS + arg + ", " # Replace the last ", " with a ) RHS = RHS[: len(RHS) - 2] + ")" else: RHS = RHS + ")" call_node.label = LHS + " = " + RHS call_node.right_hand_side_variables = rhs_vars # Used in get_sink_args rhs_visitor = RHSVisitor() rhs_visitor.visit(node) call_node.args = rhs_visitor.result if blackbox: self.blackbox_assignments.add(call_node) self.connect_if_allowed(self.nodes[-1], call_node) self.nodes.append(call_node) return call_node
def save_def_args_in_temp(self, call_args, def_args, line_number, saved_function_call_index, first_node): """Save the arguments of the definition being called. Visit the arguments if they're calls. Args: call_args(list[ast.Name]): Of the call being made. def_args(ast_helper.Arguments): Of the definition being called. line_number(int): Of the call being made. saved_function_call_index(int): Unique number for each call. first_node(EntryOrExitNode or None or RestoreNode): Used to connect previous statements to this function. Returns: args_mapping(dict): A mapping of call argument to definition argument. first_node(EntryOrExitNode or None or RestoreNode): Used to connect previous statements to this function. """ args_mapping = dict() last_return_value_of_nested_call = None # Create e.g. temp_N_def_arg1 = call_arg1_label_visitor.result for each argument for i, call_arg in enumerate(call_args): if i > len(def_args) - 1: break # If this results in an IndexError it is invalid Python def_arg_temp_name = ("temp_" + str(saved_function_call_index) + "_" + def_args[i]) return_value_of_nested_call = None if isinstance(call_arg, ast.Call): return_value_of_nested_call = self.visit(call_arg) restore_node = RestoreNode( def_arg_temp_name + " = " + return_value_of_nested_call.left_hand_side, def_arg_temp_name, [return_value_of_nested_call.left_hand_side], line_number=line_number, path=self.filenames[-1], ) if return_value_of_nested_call in self.blackbox_assignments: self.blackbox_assignments.add(restore_node) else: call_arg_label_visitor = LabelVisitor() call_arg_label_visitor.visit(call_arg) call_arg_rhs_visitor = RHSVisitor() call_arg_rhs_visitor.visit(call_arg) restore_node = RestoreNode( def_arg_temp_name + " = " + call_arg_label_visitor.result, def_arg_temp_name, call_arg_rhs_visitor.result, line_number=line_number, path=self.filenames[-1], ) # If there are no saved variables, then this is the first node if not first_node: first_node = restore_node if isinstance(call_arg, ast.Call): if last_return_value_of_nested_call: # connect inner to other_inner in e.g. `outer(inner(image_name), other_inner(image_name))` if isinstance(return_value_of_nested_call, BBorBInode): last_return_value_of_nested_call.connect( return_value_of_nested_call) else: last_return_value_of_nested_call.connect( return_value_of_nested_call.first_node) else: # I should only set this once per loop, inner in e.g. `outer(inner(image_name), other_inner(image_name))` # (inner_most_call is used when predecessor is a ControlFlowNode in connect_control_flow_node) if isinstance(return_value_of_nested_call, BBorBInode): first_node.inner_most_call = return_value_of_nested_call else: first_node.inner_most_call = ( return_value_of_nested_call.first_node) # We purposefully should not set this as the first_node of return_value_of_nested_call, last makes sense last_return_value_of_nested_call = return_value_of_nested_call self.connect_if_allowed(self.nodes[-1], restore_node) self.nodes.append(restore_node) if isinstance(call_arg, ast.Call): args_mapping[ return_value_of_nested_call.left_hand_side] = def_args[i] else: args_mapping[def_args[i]] = call_arg_label_visitor.result return (args_mapping, first_node)