def visit_Return(self, node): label = LabelVisitor() label.visit(node) this_function_name = self.function_return_stack[-1] LHS = 'ret_' + this_function_name if isinstance(node.value, ast.Call): return_value_of_call = self.visit(node.value) return_node = ReturnNode( LHS + ' = ' + return_value_of_call.left_hand_side, LHS, node, [return_value_of_call.left_hand_side], path=self.filenames[-1]) return_value_of_call.connect(return_node) return self.append_node(return_node) elif node.value is not None: rhs_visitor_result = RHSVisitor.result_for_node(node.value) else: rhs_visitor_result = [] return self.append_node( ReturnNode(LHS + ' = ' + label.result, LHS, node, rhs_visitor_result, path=self.filenames[-1]))
def __init__(self, test_node, ast_node, *, path): label_visitor = LabelVisitor() label_visitor.visit(test_node) super().__init__('if ' + label_visitor.result + ':', ast_node, path=path)
def visit_miscelleaneous_node(self, node, custom_label=None): if custom_label: label = custom_label else: label_visitor = LabelVisitor() label_visitor.visit(node) label = label_visitor.result return self.append_node(Node(label, node, path=self.filenames[-1]))
def visit_AugAssign(self, node): label = LabelVisitor() label.visit(node) rhs_visitor = RHSVisitor() rhs_visitor.visit(node.value) lhs = extract_left_hand_side(node.target) return self.append_node( AssignmentNode(label.result, lhs, node, rhs_visitor.result + [lhs], path=self.filenames[-1]))
def visit_Yield(self, node): label = LabelVisitor() label.visit(node) if node.value is None: rhs_visitor_result = [] else: rhs_visitor_result = RHSVisitor.result_for_node(node.value) # Yield is a bit like augmented assignment to a return value this_function_name = self.function_return_stack[-1] LHS = 'yld_' + this_function_name return self.append_node( YieldNode(LHS + ' += ' + label.result, LHS, node, rhs_visitor_result + [LHS], path=self.filenames[-1]))
def visit_While(self, node): label_visitor = LabelVisitor() test = node.test # the test condition of the while loop label_visitor.visit(test) while_node = self.append_node( Node('while ' + label_visitor.result + ':', node, path=self.filenames[-1])) if isinstance(test, ast.Compare): # quirk. See https://greentreesnakes.readthedocs.io/en/latest/nodes.html#Compare self.process_loop_funcs(test.left, while_node) for comp in test.comparators: self.process_loop_funcs(comp, while_node) else: # while foo(): self.process_loop_funcs(test, while_node) return self.loop_node_skeleton(while_node, node)
def visit(target, value): label = LabelVisitor() label.visit(target) rhs_visitor = RHSVisitor() rhs_visitor.visit(value) if isinstance(value, ast.Call): new_ast_node = ast.Assign(target, value) ast.copy_location(new_ast_node, target) new_assignment_nodes.append( self.assignment_call_node(label.result, new_ast_node)) else: label.result += ' = ' label.visit(value) new_assignment_nodes.append( self.append_node( AssignmentNode(label.result, extract_left_hand_side(target), ast.Assign(target, value), rhs_visitor.result, line_number=target.lineno, path=self.filenames[-1]))) remaining_targets.remove(target) remaining_values.remove(value) for var in rhs_visitor.result: remaining_variables.remove(var)
def visit_With(self, node): label_visitor = LabelVisitor() label_visitor.visit(node.items[0]) with_node = self.append_node( Node(label_visitor.result, node, path=self.filenames[-1])) for item in node.items: if isinstance(item.context_expr, ast.Call) and isinstance( item.context_expr.func, ast.Name ) and item.context_expr.func.id == "open" and isinstance( item.context_expr.args[0], ast.Name): target_node = _ast.Name(id=item.context_expr.args[0].id, ctx=ast.Store()) value_node = _ast.Name(item.optional_vars.id, ctx=ast.Load()) # target_node.lineno = node.lineno # value_node.lineno = node.lineno node.body.append( _ast.Assign(targets=[target_node], value=value_node)) ast.fix_missing_locations(node) connect_statements = self.stmt_star_handler(node.body) with_node.connect(connect_statements.first_statement) return ControlFlowNode(with_node, connect_statements.last_statements, connect_statements.break_statements)
def visit_For(self, node): self.undecided = False iterator_label = LabelVisitor() iterator_label.visit(node.iter) target_label = LabelVisitor() target_label.visit(node.target) for_node = self.append_node( Node("for " + target_label.result + " in " + iterator_label.result + ':', node, path=self.filenames[-1])) self.process_loop_funcs(node.iter, for_node) return self.loop_node_skeleton(for_node, node)
def assign_multi_target(self, node, right_hand_side_variables): new_assignment_nodes = list() for target in node.targets: label = LabelVisitor() label.visit(target) left_hand_side = label.result label.result += ' = ' label.visit(node.value) new_assignment_nodes.append( self.append_node( AssignmentNode(label.result, left_hand_side, ast.Assign(target, node.value), right_hand_side_variables, line_number=node.lineno, path=self.filenames[-1]))) connect_nodes(new_assignment_nodes) return ControlFlowNode(new_assignment_nodes[0], [new_assignment_nodes[-1]], []) # return the last added node
def __init__(self, ast_node, *, path): label_visitor = LabelVisitor() label_visitor.visit(ast_node) super().__init__(label_visitor.result, ast_node, path=path)
def visit_Assert(self, node): label_visitor = LabelVisitor() label_visitor.visit(node.test) return self.append_node( Node(label_visitor.result, node, path=self.filenames[-1]))
def visit_Delete(self, node): labelVisitor = LabelVisitor() for expr in node.targets: labelVisitor.visit(expr) return self.append_node( Node('del ' + labelVisitor.result, node, path=self.filenames[-1]))
def assign_tuple_target(self, target_nodes, value_nodes, right_hand_side_variables): new_assignment_nodes = [] remaining_variables = list(right_hand_side_variables) remaining_targets = list(target_nodes) remaining_values = list(value_nodes) # May contain duplicates def visit(target, value): label = LabelVisitor() label.visit(target) rhs_visitor = RHSVisitor() rhs_visitor.visit(value) if isinstance(value, ast.Call): new_ast_node = ast.Assign(target, value) ast.copy_location(new_ast_node, target) new_assignment_nodes.append( self.assignment_call_node(label.result, new_ast_node)) else: label.result += ' = ' label.visit(value) new_assignment_nodes.append( self.append_node( AssignmentNode(label.result, extract_left_hand_side(target), ast.Assign(target, value), rhs_visitor.result, line_number=target.lineno, path=self.filenames[-1]))) remaining_targets.remove(target) remaining_values.remove(value) for var in rhs_visitor.result: remaining_variables.remove(var) # Pair targets and values until a Starred node is reached for target, value in zip(target_nodes, value_nodes): if isinstance(target, ast.Starred) or isinstance( value, ast.Starred): break visit(target, value) # If there was a Starred node, pair remaining targets and values from the end for target, value in zip(reversed(list(remaining_targets)), reversed(list(remaining_values))): if isinstance(target, ast.Starred) or isinstance( value, ast.Starred): break visit(target, value) if remaining_targets: label = LabelVisitor() label.handle_comma_separated(remaining_targets) label.result += ' = ' label.handle_comma_separated(remaining_values) for target in remaining_targets: new_assignment_nodes.append( self.append_node( AssignmentNode(label.result, extract_left_hand_side(target), ast.Assign(target, remaining_values[0]), remaining_variables, line_number=target.lineno, path=self.filenames[-1]))) connect_nodes(new_assignment_nodes) return ControlFlowNode(new_assignment_nodes[0], [new_assignment_nodes[-1]], []) # return the last added node
def visit_Assign(self, node): rhs_visitor = RHSVisitor() rhs_visitor.visit(node.value) if isinstance(node.targets[0], (ast.Tuple, ast.List)): # x,y = [1,2] if isinstance(node.value, (ast.Tuple, ast.List)): return self.assign_tuple_target(node.targets[0].elts, node.value.elts, rhs_visitor.result) elif isinstance(node.value, ast.Call): call = None for element in node.targets[0].elts: label = LabelVisitor() label.visit(element) call = self.assignment_call_node(label.result, node) return call elif isinstance(node.value, ast.Name): # Treat `x, y = z` like `x, y = (*z,)` value_node = ast.Starred(node.value, ast.Load()) ast.copy_location(value_node, node) return self.assign_tuple_target(node.targets[0].elts, [value_node], rhs_visitor.result) else: label = LabelVisitor() label.visit(node) log.warn( 'Assignment not properly handled in %s. Could result in not finding a vulnerability.' 'Assignment: %s', getattr(self, 'filenames', ['?'])[0], self.label.result, ) return self.append_node( AssignmentNode(label.result, label.result, node, rhs_visitor.result, path=self.filenames[-1])) elif len(node.targets) > 1: # x = y = 3 return self.assign_multi_target(node, rhs_visitor.result) else: if isinstance(node.value, ast.Call): # x = call() label = LabelVisitor() label.visit(node.targets[0]) return self.assignment_call_node(label.result, node) else: # x = 4 label = LabelVisitor() label.visit(node) return self.append_node( AssignmentNode(label.result, extract_left_hand_side(node.targets[0]), node, rhs_visitor.result, path=self.filenames[-1]))
def save_def_args_in_temp(self, call_args, def_args, line_number, saved_function_call_index, first_node): """Save the arguments of the definition being called. Visit the arguments if they're calls. Args: call_args(list[ast.Name]): Of the call being made. def_args(ast_helper.Arguments): Of the definition being called. line_number(int): Of the call being made. saved_function_call_index(int): Unique number for each call. first_node(EntryOrExitNode or None or RestoreNode): Used to connect previous statements to this function. Returns: args_mapping(dict): A mapping of call argument to definition argument. first_node(EntryOrExitNode or None or RestoreNode): Used to connect previous statements to this function. """ args_mapping = dict() last_return_value_of_nested_call = None # Create e.g. temp_N_def_arg1 = call_arg1_label_visitor.result for each argument for i, call_arg in enumerate(call_args): # If this results in an IndexError it is invalid Python def_arg_temp_name = 'temp_' + str( saved_function_call_index) + '_' + def_args[i] return_value_of_nested_call = None if isinstance(call_arg, ast.Call): return_value_of_nested_call = self.visit(call_arg) restore_node = RestoreNode( def_arg_temp_name + ' = ' + return_value_of_nested_call.left_hand_side, def_arg_temp_name, [return_value_of_nested_call.left_hand_side], line_number=line_number, path=self.filenames[-1]) if return_value_of_nested_call in self.blackbox_assignments: self.blackbox_assignments.add(restore_node) else: call_arg_label_visitor = LabelVisitor() call_arg_label_visitor.visit(call_arg) call_arg_rhs_visitor = RHSVisitor() call_arg_rhs_visitor.visit(call_arg) restore_node = RestoreNode(def_arg_temp_name + ' = ' + call_arg_label_visitor.result, def_arg_temp_name, call_arg_rhs_visitor.result, line_number=line_number, path=self.filenames[-1]) # If there are no saved variables, then this is the first node if not first_node: first_node = restore_node if isinstance(call_arg, ast.Call): if last_return_value_of_nested_call: # connect inner to other_inner in e.g. `outer(inner(image_name), other_inner(image_name))` if isinstance(return_value_of_nested_call, BBorBInode): last_return_value_of_nested_call.connect( return_value_of_nested_call) else: last_return_value_of_nested_call.connect( return_value_of_nested_call.first_node) else: # I should only set this once per loop, inner in e.g. `outer(inner(image_name), other_inner(image_name))` # (inner_most_call is used when predecessor is a ControlFlowNode in connect_control_flow_node) if isinstance(return_value_of_nested_call, BBorBInode): first_node.inner_most_call = return_value_of_nested_call else: first_node.inner_most_call = return_value_of_nested_call.first_node # We purposefully should not set this as the first_node of return_value_of_nested_call, last makes sense last_return_value_of_nested_call = return_value_of_nested_call self.connect_if_allowed(self.nodes[-1], restore_node) self.nodes.append(restore_node) if isinstance(call_arg, ast.Call): args_mapping[ return_value_of_nested_call.left_hand_side] = def_args[i] else: args_mapping[def_args[i]] = call_arg_label_visitor.result return (args_mapping, first_node)
def add_blackbox_or_builtin_call(self, node, blackbox): # noqa: C901 """Processes a blackbox or builtin function when it is called. Nothing gets assigned to ret_func_foo in the builtin/blackbox case. Increments self.function_call_index each time it is called, we can refer to it as N in the comments. Create e.g. ~call_1 = ret_func_foo RestoreNode. Create e.g. temp_N_def_arg1 = call_arg1_label_visitor.result for each argument. Visit the arguments if they're calls. (save_def_args_in_temp) I do not think I care about this one actually -- Create e.g. def_arg1 = temp_N_def_arg1 for each argument. (create_local_scope_from_def_args) Add RestoreNode to the end of the Nodes. Args: node(ast.Call) : The node that calls the definition. blackbox(bool): Whether or not it is a builtin or blackbox call. Returns: call_node(BBorBInode): The call node. """ self.function_call_index += 1 saved_function_call_index = self.function_call_index self.undecided = False call_label_visitor = LabelVisitor() call_label_visitor.visit(node) call_function_label = call_label_visitor.result[:call_label_visitor. result.find('(')] # Check if function call matches a blackbox/built-in alias and if so, resolve it # This resolves aliases like "from os import system as mysys" as: mysys -> os.system local_definitions = self.module_definitions_stack[-1] call_function_label = fully_qualify_alias_labels( call_function_label, local_definitions.import_alias_mapping) # Create e.g. ~call_1 = ret_func_foo LHS = CALL_IDENTIFIER + 'call_' + str(saved_function_call_index) RHS = 'ret_' + call_function_label + '(' call_node = BBorBInode(label='', left_hand_side=LHS, ast_node=node, right_hand_side_variables=[], line_number=node.lineno, path=self.filenames[-1], func_name=call_function_label) visual_args = list() rhs_vars = list() last_return_value_of_nested_call = None for arg_node in itertools.chain(node.args, node.keywords): arg = arg_node.value if isinstance(arg_node, ast.keyword) else arg_node arg_collector = RHSVisitor() arg_collector.visit(arg_node) # print("args found: " + str(arg_collector.result)) if isinstance(arg, ast.Call): return_value_of_nested_call = self.visit(arg) if last_return_value_of_nested_call: # connect inner to other_inner in e.g. # `scrypt.outer(scrypt.inner(image_name), scrypt.other_inner(image_name))` # I should probably loop to the inner most call of other_inner here. try: last_return_value_of_nested_call.connect( return_value_of_nested_call.first_node) except AttributeError: last_return_value_of_nested_call.connect( return_value_of_nested_call) else: # I should only set this once per loop, inner in e.g. # `scrypt.outer(scrypt.inner(image_name), scrypt.other_inner(image_name))` # (inner_most_call is used when predecessor is a ControlFlowNode in connect_control_flow_node) call_node.inner_most_call = return_value_of_nested_call last_return_value_of_nested_call = return_value_of_nested_call if isinstance(arg_node, ast.keyword) and arg_node.arg is not None: visual_args.append( arg_node.arg + '=' + return_value_of_nested_call.left_hand_side) else: visual_args.append( return_value_of_nested_call.left_hand_side) visual_args.extend(arg_collector.result) rhs_vars.append(return_value_of_nested_call.left_hand_side) rhs_vars.extend(arg_collector.result) else: label = LabelVisitor() label.visit(arg_node) visual_args.append(label.result) vv = VarsVisitor() vv.visit(arg_node) rhs_vars.extend(vv.result) if last_return_value_of_nested_call: # connect other_inner to outer in e.g. # `scrypt.outer(scrypt.inner(image_name), scrypt.other_inner(image_name))` last_return_value_of_nested_call.connect(call_node) call_names = list(get_call_names(node.func)) if len(call_names) > 1: # taint is a RHS variable (self) of taint.lower() rhs_vars.append(call_names[0]) if len(visual_args) > 0: for arg in visual_args: RHS = RHS + arg + ", " # Replace the last ", " with a ) RHS = RHS[:len(RHS) - 2] + ')' else: RHS = RHS + ')' call_node.label = LHS + " = " + RHS call_node.right_hand_side_variables = rhs_vars # Used in get_sink_args rhs_visitor = RHSVisitor() rhs_visitor.visit(node) call_node.args = rhs_visitor.result if blackbox: self.blackbox_assignments.add(call_node) self.connect_if_allowed(self.nodes[-1], call_node) self.nodes.append(call_node) return call_node