def test_walker(): g = DiGraph() n1 = DiGraph.make_node(data='n1') n2 = DiGraph.make_node(data='n2') n3 = DiGraph.make_node(data='n3') n4 = DiGraph.make_node(data='n4') n5 = DiGraph.make_node(data='n5') n6 = DiGraph.make_node(data='n6') e12 = DiGraph.make_edge(source=n1, dest=n2) e13 = DiGraph.make_edge(source=n1, dest=n3) e14 = DiGraph.make_edge(source=n1, dest=n4) e24 = DiGraph.make_edge(source=n2, dest=n4) e34 = DiGraph.make_edge(source=n3, dest=n4) e45 = DiGraph.make_edge(source=n4, dest=n5) e56 = DiGraph.make_edge(source=n5, dest=n6) e61 = DiGraph.make_edge(source=n6, dest=n1) # add cycle g.add_edge(e12) g.add_edge(e13) g.add_edge(e14) g.add_edge(e24) g.add_edge(e34) g.add_edge(e45) g.add_edge(e56) g.add_edge(e61) class EdgePrinterVisitor(EdgeVisitor): def __init__(self): EdgeVisitor.__init__(self) self.edges = [] def visit(self, edge): self.edges.append(edge) visitor = EdgePrinterVisitor() walker = Walker(g, visitor) walker.traverse(n1) logger.debug("Post-order DFS: %s", dfs_postorder_nodes(g, n1)) assert len(visitor.edges) == 8 # logger.debug("\n" + g.to_dot()) s = DiGraph() n1 = s.make_add_node(data='n1') n2 = s.make_add_node(data='n2') n3 = s.make_add_node(data='n3') e12 = s.make_add_edge(n1, n2) e23 = s.make_add_edge(n2, n3) logger.debug("Post-order DFS: %s", dfs_postorder_nodes(s, n1))
def test_loop_breaks(): logger.debug("test_loop_breaks") co_simple = get_co(LOOP_BREAK_CASE) assert co_simple is not None bytecode_object = BytecodeObject('<string>') bytecode_object.parse_code(co_simple) assert len(bytecode_object.declarations) == 2 for decl in bytecode_object.declarations: cflow = ControlFlow(decl) assert cflow.blocks is not None assert len(cflow.dominators.dom) > 0
def test_inverse_graph(): g = DiGraph() n1 = DiGraph.make_node(data='n1') n2 = DiGraph.make_node(data='n2') n3 = DiGraph.make_node(data='n3') n4 = DiGraph.make_node(data='n4') e12 = DiGraph.make_edge(source=n1, dest=n2) e13 = DiGraph.make_edge(source=n1, dest=n3) e14 = DiGraph.make_edge(source=n1, dest=n4) e24 = DiGraph.make_edge(source=n2, dest=n4) e34 = DiGraph.make_edge(source=n3, dest=n4) g.add_edge(e12) g.add_edge(e13) g.add_edge(e14) g.add_edge(e24) g.add_edge(e34) logger.debug("Normal graph :=\n%s", g.to_dot()) inv_g = g.inverse() logger.debug("Inverse graph :=\n%s", inv_g.to_dot())
def test_user_pic(): co_simple = get_co(USER_PIC) assert co_simple is not None bytecode_object = BytecodeObject('<string>') bytecode_object.parse_code(co_simple) assert len(bytecode_object.declarations) == 2 for decl in bytecode_object.declarations: cflow = ControlFlow(decl) # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) f = open('USER_PIC.dot', 'w') f.write(cflow.graph.to_dot()) f.close()
def test_if_statements(): co_simple = get_co(IF_STMTS_CASE) assert co_simple is not None bytecode_object = BytecodeObject('<strhhhhhing>') bytecode_object.parse_code(co_simple) logger.debug('bytecode_object is ') logger.debug(dir(bytecode_object)) # logger.debug('bytecode_object.bytecode is ') # logger.debug(bytecode_object.bytecode) logger.debug('bytecode_object.loads is ') logger.debug(bytecode_object.loads) logger.debug('bytecode_object.stores is ') logger.debug(bytecode_object.stores) logger.debug('bytecode_object.compares is ') logger.debug(bytecode_object.compares) logger.debug('bytecode_object.rets is ') logger.debug(bytecode_object.rets) logger.debug('Finished parsing code\n') assert len(bytecode_object.declarations) == 1 for decl in bytecode_object.declarations: cflow = ControlFlow(decl) # Print Dominance Frontier logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) # Print Dominators # logger.debug('Dominators are') # logger.debug('----------------------------') # cflow.dominators.print_tree(post_dom=False) # logger.debug('----------------------------') # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) # Create IF_STMTS_CASE.png f = open('IF_STMTS_CASE.dot', 'w') f.write(cflow.graph.to_dot()) f.close() #G=pgv.AGraph("IF_STMTS_CASE.dot", strict=False, overlap=False, splines='spline') #G.layout() #G.draw('IF_STMTS_CASE.png') assert cflow.blocks is not None assert len(cflow.dominators.dom) > 0
def test_delete_false_positives(): """ Test out huge mother f****r against fixed dominator code. This means both the buddy system worked. the dominators fix worked. """ f = open("files_to_test_against/test_delete_false_positives.py") source = f.read() logger.debug('source is') logger.debug(source) bytecode_object = BytecodeObject('<string>') compiled_code = get_co(source) bytecode_object.parse_code(compiled_code) logger.debug('Declarations are') logger.debug(bytecode_object.declarations) logger.debug('bytecode_object.main_module is') logger.debug(bytecode_object.main_module) # We can save the cflow for each decl all_the_decls = {} # First loop gets all of the cflow objects for each decl for decl in bytecode_object.declarations: logger.debug('decl.kind is') logger.debug(decl.kind) if decl.kind == Declaration.METHOD: logger.debug('decl.formal_parameters are') logger.debug(decl.formal_parameters) for param in decl.formal_parameters: if param != 'self': logger.debug("Going to say %s is tainted", param) logger.debug('dir(decl) is') logger.debug(dir(decl)) cflow = ControlFlow(decl) all_the_decls[decl] = cflow logger.debug("Decls are in this order %s", reversed(bytecode_object.declarations)) logger.debug("Decls are in this order type(%s)", type(bytecode_object.declarations)) x = 0 # Now we can pass every cflow object to every decl for decl in reversed(bytecode_object.declarations): cflow = all_the_decls[decl] # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) graph = cflow.graph # Start new code logger.debug("Uh what, # of nodes is %s", len(graph.nodes)) logger.debug("Here they are %s", graph.nodes) # Access first element of the set for node in graph.nodes: if node.kind == 'ENTRY': # We just want to run this on the root bfs_set_buddies(graph, node) # raise break # End new code for node in graph.nodes: logger.debug("Bing %s is %s", type(node), node) predecessors = graph.in_edges(node) if len(predecessors) >= 2: logger.debug("predecessors for %s are %s", node, predecessors) for in_edge in cflow.graph.in_edges(cflow.exit_node): logger.debug("Node in_edge.source.data is %s", in_edge.source.data) logger.debug("Node in_edge.source.data.length is %s", in_edge.source.data.length) logger.debug("Node in_edge.source.data.bytecode_slice is %s", in_edge.source.data.bytecode_slice) logger.debug("Node in_edge.source.data.end_target is %s", in_edge.source.data.end_target) in_edge.source.data.has_ret_value = True logger.debug('Now starting taint propagation') # vv Not sure if valid any more vv # We can't just pass root in -- the stack, environment etc. will progagate otherwise. # ^^ Not sure if valid any more ^^ if decl.kind == Declaration.METHOD: for param in decl.formal_parameters: logger.debug("formal_param is %s", param) one_at_a_time = set() if param != 'self': one_at_a_time.add(param) logger.debug('one_at_a_time is %s', one_at_a_time) cflow.taint_propagation(cflow.root, decl, tainted=one_at_a_time, stack=[], environment={}, immune={}, previous_edges=set(), new_edge='', other_decls=all_the_decls, interprocedural_mode=True, stack_of_buddies=[]) if decl.pretty_summary: logger.error(decl.formal_parameters) logger.error(decl.pretty_summary) logger.error(decl.one_param_summary) decl.all_params_summary[param] = decl.one_param_summary logger.error("Here it is :)") logger.error(decl.all_params_summary) if decl.vuln_summary: logger.error("ONE TIME ONLY!") decl.all_params_vuln_summary[param] = decl.vuln_summary # We clean the slate for the next param decl.vuln_summary = [] decl.inter_vuln_summary = [] if decl.returns_tainted: decl.all_params_returns_tainted[param] = True logger.debug("BEFORE") decl.returns_tainted = False f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 else: cflow.taint_propagation(cflow.root, current_decl=None, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='', other_decls=all_the_decls, interprocedural_mode=True, stack_of_buddies=[]) f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 logger.debug("All the decls dict is %s", all_the_decls) with open("junkhacker.log.py") as log_file: logs = log_file.read() first_key_string = "So current_bloc.succ == stack_of_buddies[-1]" assert logs.count(first_key_string) == 31 last_key_string = "Uh oh spaghettios" assert logs.count(last_key_string) == 2
def test_left_foo_on_stack_due_to_conditional(): """ """ f = open("files_to_test_against/left_foo_on_stack_due_to_conditional.py") source = f.read() logger.debug('source is') logger.debug(source) bytecode_object = BytecodeObject('<string>') compiled_code = get_co(source) bytecode_object.parse_code(compiled_code) logger.debug('ghi declarations are') logger.debug(bytecode_object.declarations) # logger.debug('ghi dir(bytecode_object) is') # logger.debug(dir(bytecode_object)) logger.debug('bytecode_object.main_module is') logger.debug(bytecode_object.main_module) for imp_stmt in bytecode_object.main_module.imports: #enumerate tktk logger.debug('imp_stmt is') logger.debug(imp_stmt) if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS): logger.debug("EMERGENCY") x = 0 for decl in bytecode_object.declarations: # logger.debug('decl.formal_parameters is') # logger.debug(decl.formal_parameters) logger.debug('decl.kind is') logger.debug(decl.kind) if decl.kind == Declaration.METHOD: logger.debug('decl.formal_parameters is') logger.debug(decl.formal_parameters) logger.debug('dir(decl) is') logger.debug(dir(decl)) cflow = ControlFlow(decl) # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) # We can't just pass root in -- the stack, environment etc. will progagate otherwise. if decl.kind == Declaration.METHOD: cflow.taint_propagation(cflow.root, tainted=set(decl.formal_parameters), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') else: cflow.taint_propagation(cflow.root, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') # logger.debug('dominators.frontier is') # logger.debug(cflow.dominators.frontier) f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 # assert with open("junkhacker.log.py") as log_file: logs = log_file.read() assert "Cleaning a leftover *const_foo due to a JUMP_IF_TRUE_OR_POP instruction at edge 0->36" in logs assert logs.count("Cleaning a leftover") == 1
def test_tryexcept(): """ """ # Part 1 f = open("files_to_test_against/tryexcept.py") source = f.read() logger.debug('source is') logger.debug(source) bytecode_object = BytecodeObject('<string>') compiled_code = get_co(source) bytecode_object.parse_code(compiled_code) logger.debug('ghi declarations are') logger.debug(bytecode_object.declarations) # logger.debug('ghi dir(bytecode_object) is') # logger.debug(dir(bytecode_object)) logger.debug('bytecode_object.main_module is') logger.debug(bytecode_object.main_module) for imp_stmt in bytecode_object.main_module.imports: #enumerate tktk logger.debug('imp_stmt is') logger.debug(imp_stmt) if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS): logger.debug("EMERGENCY") x = 0 for decl in bytecode_object.declarations: # logger.debug('decl.formal_parameters is') # logger.debug(decl.formal_parameters) logger.debug('decl.kind is') logger.debug(decl.kind) if decl.kind == Declaration.METHOD: logger.debug('decl.formal_parameters is') logger.debug(decl.formal_parameters) logger.debug('dir(decl) is') logger.debug(dir(decl)) cflow = ControlFlow(decl) # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) # vv Not sure if valid any more vv # We can't just pass root in -- the stack, environment etc. will progagate otherwise. # ^^ Not sure if valid any more ^^ if decl.kind == Declaration.METHOD: cflow.taint_propagation(cflow.root, tainted=set(decl.formal_parameters), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') else: cflow.taint_propagation(cflow.root, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 # with open("junkhacker.log.py") as log_file: # logs = log_file.read() # there_was_an_error = "Traceback" # assert logs.count(there_was_an_error) == 0 # Part 2 f = open("files_to_test_against/tryexceptexception.py") source = f.read() logger.debug('source is') logger.debug(source) bytecode_object = BytecodeObject('<string>') compiled_code = get_co(source) bytecode_object.parse_code(compiled_code) logger.debug('ghi declarations are') logger.debug(bytecode_object.declarations) # logger.debug('ghi dir(bytecode_object) is') # logger.debug(dir(bytecode_object)) logger.debug('bytecode_object.main_module is') logger.debug(bytecode_object.main_module) for imp_stmt in bytecode_object.main_module.imports: #enumerate tktk logger.debug('imp_stmt is') logger.debug(imp_stmt) if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS): logger.debug("EMERGENCY") x = 0 for decl in bytecode_object.declarations: # logger.debug('decl.formal_parameters is') # logger.debug(decl.formal_parameters) logger.debug('decl.kind is') logger.debug(decl.kind) if decl.kind == Declaration.METHOD: logger.debug('decl.formal_parameters is') logger.debug(decl.formal_parameters) logger.debug('dir(decl) is') logger.debug(dir(decl)) cflow = ControlFlow(decl) # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) # vv Not sure if valid any more vv # We can't just pass root in -- the stack, environment etc. will progagate otherwise. # ^^ Not sure if valid any more ^^ if decl.kind == Declaration.METHOD: cflow.taint_propagation(cflow.root, tainted=set(decl.formal_parameters), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') else: cflow.taint_propagation(cflow.root, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 with open("junkhacker.log.py") as log_file: logs = log_file.read() there_was_an_error = "Traceback" assert logs.count(there_was_an_error) == 0 returning = ") - returning " return_value0 = str([{'*const_message': '*const_Yo'}]) key_string0 = returning + return_value0 assert logs.count(key_string0) == 6 return_value1 = str(['*const_None']) key_string1 = returning + return_value1 assert logs.count(key_string1) == 2 return_value2 = str([{'*const_message': '*const_success'}]) key_string2 = returning + return_value2 # Why 5? Why tryexcept 2 + tryexceptexception 3? Oh well. assert logs.count(key_string2) == 5 return_value3 = str([{'*const_message': '*const_Failed'}]) key_string3 = returning + return_value3 assert logs.count(key_string3) == 2
def test_kwargs(): """ """ f = open("files_to_test_against/kwargs.py") source = f.read() logger.debug('source is') logger.debug(source) bytecode_object = BytecodeObject('<string>') compiled_code = get_co(source) bytecode_object.parse_code(compiled_code) logger.debug('ghi declarations are') logger.debug(bytecode_object.declarations) # logger.debug('ghi dir(bytecode_object) is') # logger.debug(dir(bytecode_object)) logger.debug('bytecode_object.main_module is') logger.debug(bytecode_object.main_module) for imp_stmt in bytecode_object.main_module.imports: #enumerate tktk logger.debug('imp_stmt is') logger.debug(imp_stmt) if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS): logger.debug("EMERGENCY") x = 0 for decl in bytecode_object.declarations: # logger.debug('decl.formal_parameters is') # logger.debug(decl.formal_parameters) logger.debug('decl.kind is') logger.debug(decl.kind) if decl.kind == Declaration.METHOD: logger.debug('decl.formal_parameters is') logger.debug(decl.formal_parameters) logger.debug('dir(decl) is') logger.debug(dir(decl)) cflow = ControlFlow(decl) # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) # vv Not sure if valid any more vv # We can't just pass root in -- the stack, environment etc. will progagate otherwise. # ^^ Not sure if valid any more ^^ if decl.kind == Declaration.METHOD: cflow.taint_propagation(cflow.root, tainted=set(decl.formal_parameters), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') else: cflow.taint_propagation(cflow.root, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 with open("junkhacker.log.py") as log_file: logs = log_file.read() kwargs = { '*const_user': '******'Alice.me\', [], {})"', '*const_consumer': 'consumer', '*const_fast': 'fast' } key_string = "type(kwargs) is <class 'junkhacker.analysis.basicBlockInterpreter.kdict'> and kwargs is " + str( kwargs) assert key_string in logs assert logs.count(key_string) == 1