def test_cflow1(): co_simple = get_co(SIMPLE_PROGRAM) assert co_simple is not None print 'hi' bytecode_object = BytecodeObject('<string>') bytecode_object.parse_code(co_simple) assert len(bytecode_object.declarations) == 9 for decl in bytecode_object.declarations: print 'Decl is ' print decl print 'cflow is ' cflow = ControlFlow(decl) # f = open('SIMPLE_PROGRAM.dot', 'w') # f.write(cflow.graph.to_dot()) # f.close() # G=pgv.AGraph("SIMPLE_PROGRAM.dot", strict=False, overlap=False, splines='spline') # G.layout() # G.draw('SIMPLE_PROGRAM.png') assert cflow.blocks is not None assert len(cflow.dominators.dom) > 0
def test_decl_1(): co_simple = get_co(SIMPLE_PROGRAM) assert co_simple is not None bytecode_object = BytecodeObject('<string>') bytecode_object.parse_code(co_simple) assert len(bytecode_object.declarations) == 12 assert len([ k for k in bytecode_object.declarations if isinstance(k, MethodDeclaration) and k.is_lambda ]) == 3 assert len([ k for k in bytecode_object.declarations if isinstance(k, TypeDeclaration) ]) == 3 assert len([ k for k in bytecode_object.declarations if isinstance(k, ModuleDeclaration) ]) == 1 module = bytecode_object.main_module assert len(module.imports) == 2
def test_if_statements(): co_simple = get_co(IF_STMTS_CASE) assert co_simple is not None bytecode_object = BytecodeObject('<strhhhhhing>') bytecode_object.parse_code(co_simple) logger.debug('bytecode_object is ') logger.debug(dir(bytecode_object)) # logger.debug('bytecode_object.bytecode is ') # logger.debug(bytecode_object.bytecode) logger.debug('bytecode_object.loads is ') logger.debug(bytecode_object.loads) logger.debug('bytecode_object.stores is ') logger.debug(bytecode_object.stores) logger.debug('bytecode_object.compares is ') logger.debug(bytecode_object.compares) logger.debug('bytecode_object.rets is ') logger.debug(bytecode_object.rets) logger.debug('Finished parsing code\n') assert len(bytecode_object.declarations) == 1 for decl in bytecode_object.declarations: cflow = ControlFlow(decl) # Print Dominance Frontier logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) # Print Dominators # logger.debug('Dominators are') # logger.debug('----------------------------') # cflow.dominators.print_tree(post_dom=False) # logger.debug('----------------------------') # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) # Create IF_STMTS_CASE.png f = open('IF_STMTS_CASE.dot', 'w') f.write(cflow.graph.to_dot()) f.close() #G=pgv.AGraph("IF_STMTS_CASE.dot", strict=False, overlap=False, splines='spline') #G.layout() #G.draw('IF_STMTS_CASE.png') assert cflow.blocks is not None assert len(cflow.dominators.dom) > 0
def test_loop_breaks(): logger.debug("test_loop_breaks") co_simple = get_co(LOOP_BREAK_CASE) assert co_simple is not None bytecode_object = BytecodeObject('<string>') bytecode_object.parse_code(co_simple) assert len(bytecode_object.declarations) == 2 for decl in bytecode_object.declarations: cflow = ControlFlow(decl) assert cflow.blocks is not None assert len(cflow.dominators.dom) > 0
def test_user_pic(): co_simple = get_co(USER_PIC) assert co_simple is not None bytecode_object = BytecodeObject('<string>') bytecode_object.parse_code(co_simple) assert len(bytecode_object.declarations) == 2 for decl in bytecode_object.declarations: cflow = ControlFlow(decl) # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) f = open('USER_PIC.dot', 'w') f.write(cflow.graph.to_dot()) f.close()
def test_nested_methods_lambdas(): co_simple = get_co(NESTED_METHS_PROGRAM) assert co_simple is not None bytecode_object = BytecodeObject('<string>') bytecode_object.parse_code(co_simple) assert len(bytecode_object.declarations) == 14 nested_1_decl = bytecode_object.get_decl(method_name='nested_1') assert nested_1_decl is not None assert len(nested_1_decl.children) == 2 one_method_decl = bytecode_object.get_decl(method_name='one_method') assert one_method_decl is not None assert len(one_method_decl.children) == 3 nested_1_2_decl = bytecode_object.get_decl(method_name='nested_1_2') assert nested_1_2_decl is not None assert len(nested_1_2_decl.children) == 0
def _execute_interpreter(self, fname, source_code, nosec_lines): '''Execute taint propagation on each file :param fname: The name of the file being parsed :param source_code: Original file contents :param nosec_lines: The lines of code to skip :return: The accumulated test score ''' score = [] ## Here is where we put the code to test a file logger.warn("fname is %s", fname) # logger.warn("source_code is %s", source_code) # logger.warn("nosec_lines is %s", nosec_lines) bytecode_object = BytecodeObject('<string>') compiled_code = utils.get_co(source_code) bytecode_object.parse_code(compiled_code) for decl in bytecode_object.declarations: if decl.kind == Declaration.METHOD: logger.warn('decl.formal_parameters is') logger.warn(decl.formal_parameters) cflow = ControlFlow(decl) graph = cflow.graph # We need to initialize the buddy system! # Start new code logger.debug("Uh what, # of nodes is %s", len(graph.nodes)) logger.debug("IN MANAGER Here they are %s", graph.nodes) # Access first element of the set for node in graph.nodes: if node.kind == 'ENTRY': # We just want to run this on the root bfs_set_buddies(graph, node) # raise break # End new code # Print BasicBlocks logger.warn("Blocks in CFG are: ") for b in cflow.blocks: logger.warn(b) # If it is not the MAKE_FUNCTION bytecode if len(graph.nodes) != 3: logger.warn("\n\n\n\n\n\n\nprint nodes after buddy system") logger.warn("len(graph.nodes) is %s", len(graph.nodes)) for node in graph.nodes: if node.buddy: logger.error( "IN THE MANAGER\nOkay so %s has the buddy %s", node.data.p_range(), node.buddy.data.p_range()) logger.warn("\n\n\n\n\n\n\n") if decl.kind == Declaration.METHOD: # We aren't doing "one_at_a_time yet", nor "other_decls" (inter) # cflow.taint_propagation(cflow.root, decl, tainted=one_at_a_time, stack=[], environment={}, immune={}, previous_edges=set(), new_edge='', other_decls=all_the_decls, interprocedural_mode=True, stack_of_buddies=[]) cflow.taint_propagation(cflow.root, decl, tainted=set( decl.formal_parameters), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='', other_decls=None, interprocedural_mode=False, stack_of_buddies=[], score=score) else: cflow.taint_propagation(cflow.root, current_decl=None, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='', other_decls=None, interprocedural_mode=False, stack_of_buddies=[], score=score) # res = node_visitor.JunkHackerNodeVisitor(fname, self.ma, # self.ts, self.warn, # nosec_lines, self.metrics) # score = res.process(source_code) # self.results.extend(res.tester.results) logger.error('first time score is now %s', score) logger.error('first time type(score) is now %s', type(score)) # Lol removing duplicate dict's from a list like a pro: # for x in score: # for y in score: # if x == y: # score.remove(y) # logger.error('AFTER score is now %s', score) # score = [] return score
def test_delete_false_positives(): """ Test out huge mother f****r against fixed dominator code. This means both the buddy system worked. the dominators fix worked. """ f = open("files_to_test_against/test_delete_false_positives.py") source = f.read() logger.debug('source is') logger.debug(source) bytecode_object = BytecodeObject('<string>') compiled_code = get_co(source) bytecode_object.parse_code(compiled_code) logger.debug('Declarations are') logger.debug(bytecode_object.declarations) logger.debug('bytecode_object.main_module is') logger.debug(bytecode_object.main_module) # We can save the cflow for each decl all_the_decls = {} # First loop gets all of the cflow objects for each decl for decl in bytecode_object.declarations: logger.debug('decl.kind is') logger.debug(decl.kind) if decl.kind == Declaration.METHOD: logger.debug('decl.formal_parameters are') logger.debug(decl.formal_parameters) for param in decl.formal_parameters: if param != 'self': logger.debug("Going to say %s is tainted", param) logger.debug('dir(decl) is') logger.debug(dir(decl)) cflow = ControlFlow(decl) all_the_decls[decl] = cflow logger.debug("Decls are in this order %s", reversed(bytecode_object.declarations)) logger.debug("Decls are in this order type(%s)", type(bytecode_object.declarations)) x = 0 # Now we can pass every cflow object to every decl for decl in reversed(bytecode_object.declarations): cflow = all_the_decls[decl] # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) graph = cflow.graph # Start new code logger.debug("Uh what, # of nodes is %s", len(graph.nodes)) logger.debug("Here they are %s", graph.nodes) # Access first element of the set for node in graph.nodes: if node.kind == 'ENTRY': # We just want to run this on the root bfs_set_buddies(graph, node) # raise break # End new code for node in graph.nodes: logger.debug("Bing %s is %s", type(node), node) predecessors = graph.in_edges(node) if len(predecessors) >= 2: logger.debug("predecessors for %s are %s", node, predecessors) for in_edge in cflow.graph.in_edges(cflow.exit_node): logger.debug("Node in_edge.source.data is %s", in_edge.source.data) logger.debug("Node in_edge.source.data.length is %s", in_edge.source.data.length) logger.debug("Node in_edge.source.data.bytecode_slice is %s", in_edge.source.data.bytecode_slice) logger.debug("Node in_edge.source.data.end_target is %s", in_edge.source.data.end_target) in_edge.source.data.has_ret_value = True logger.debug('Now starting taint propagation') # vv Not sure if valid any more vv # We can't just pass root in -- the stack, environment etc. will progagate otherwise. # ^^ Not sure if valid any more ^^ if decl.kind == Declaration.METHOD: for param in decl.formal_parameters: logger.debug("formal_param is %s", param) one_at_a_time = set() if param != 'self': one_at_a_time.add(param) logger.debug('one_at_a_time is %s', one_at_a_time) cflow.taint_propagation(cflow.root, decl, tainted=one_at_a_time, stack=[], environment={}, immune={}, previous_edges=set(), new_edge='', other_decls=all_the_decls, interprocedural_mode=True, stack_of_buddies=[]) if decl.pretty_summary: logger.error(decl.formal_parameters) logger.error(decl.pretty_summary) logger.error(decl.one_param_summary) decl.all_params_summary[param] = decl.one_param_summary logger.error("Here it is :)") logger.error(decl.all_params_summary) if decl.vuln_summary: logger.error("ONE TIME ONLY!") decl.all_params_vuln_summary[param] = decl.vuln_summary # We clean the slate for the next param decl.vuln_summary = [] decl.inter_vuln_summary = [] if decl.returns_tainted: decl.all_params_returns_tainted[param] = True logger.debug("BEFORE") decl.returns_tainted = False f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 else: cflow.taint_propagation(cflow.root, current_decl=None, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='', other_decls=all_the_decls, interprocedural_mode=True, stack_of_buddies=[]) f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 logger.debug("All the decls dict is %s", all_the_decls) with open("junkhacker.log.py") as log_file: logs = log_file.read() first_key_string = "So current_bloc.succ == stack_of_buddies[-1]" assert logs.count(first_key_string) == 31 last_key_string = "Uh oh spaghettios" assert logs.count(last_key_string) == 2
def test_left_foo_on_stack_due_to_conditional(): """ """ f = open("files_to_test_against/left_foo_on_stack_due_to_conditional.py") source = f.read() logger.debug('source is') logger.debug(source) bytecode_object = BytecodeObject('<string>') compiled_code = get_co(source) bytecode_object.parse_code(compiled_code) logger.debug('ghi declarations are') logger.debug(bytecode_object.declarations) # logger.debug('ghi dir(bytecode_object) is') # logger.debug(dir(bytecode_object)) logger.debug('bytecode_object.main_module is') logger.debug(bytecode_object.main_module) for imp_stmt in bytecode_object.main_module.imports: #enumerate tktk logger.debug('imp_stmt is') logger.debug(imp_stmt) if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS): logger.debug("EMERGENCY") x = 0 for decl in bytecode_object.declarations: # logger.debug('decl.formal_parameters is') # logger.debug(decl.formal_parameters) logger.debug('decl.kind is') logger.debug(decl.kind) if decl.kind == Declaration.METHOD: logger.debug('decl.formal_parameters is') logger.debug(decl.formal_parameters) logger.debug('dir(decl) is') logger.debug(dir(decl)) cflow = ControlFlow(decl) # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) # We can't just pass root in -- the stack, environment etc. will progagate otherwise. if decl.kind == Declaration.METHOD: cflow.taint_propagation(cflow.root, tainted=set(decl.formal_parameters), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') else: cflow.taint_propagation(cflow.root, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') # logger.debug('dominators.frontier is') # logger.debug(cflow.dominators.frontier) f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 # assert with open("junkhacker.log.py") as log_file: logs = log_file.read() assert "Cleaning a leftover *const_foo due to a JUMP_IF_TRUE_OR_POP instruction at edge 0->36" in logs assert logs.count("Cleaning a leftover") == 1
def test_tryexcept(): """ """ # Part 1 f = open("files_to_test_against/tryexcept.py") source = f.read() logger.debug('source is') logger.debug(source) bytecode_object = BytecodeObject('<string>') compiled_code = get_co(source) bytecode_object.parse_code(compiled_code) logger.debug('ghi declarations are') logger.debug(bytecode_object.declarations) # logger.debug('ghi dir(bytecode_object) is') # logger.debug(dir(bytecode_object)) logger.debug('bytecode_object.main_module is') logger.debug(bytecode_object.main_module) for imp_stmt in bytecode_object.main_module.imports: #enumerate tktk logger.debug('imp_stmt is') logger.debug(imp_stmt) if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS): logger.debug("EMERGENCY") x = 0 for decl in bytecode_object.declarations: # logger.debug('decl.formal_parameters is') # logger.debug(decl.formal_parameters) logger.debug('decl.kind is') logger.debug(decl.kind) if decl.kind == Declaration.METHOD: logger.debug('decl.formal_parameters is') logger.debug(decl.formal_parameters) logger.debug('dir(decl) is') logger.debug(dir(decl)) cflow = ControlFlow(decl) # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) # vv Not sure if valid any more vv # We can't just pass root in -- the stack, environment etc. will progagate otherwise. # ^^ Not sure if valid any more ^^ if decl.kind == Declaration.METHOD: cflow.taint_propagation(cflow.root, tainted=set(decl.formal_parameters), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') else: cflow.taint_propagation(cflow.root, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 # with open("junkhacker.log.py") as log_file: # logs = log_file.read() # there_was_an_error = "Traceback" # assert logs.count(there_was_an_error) == 0 # Part 2 f = open("files_to_test_against/tryexceptexception.py") source = f.read() logger.debug('source is') logger.debug(source) bytecode_object = BytecodeObject('<string>') compiled_code = get_co(source) bytecode_object.parse_code(compiled_code) logger.debug('ghi declarations are') logger.debug(bytecode_object.declarations) # logger.debug('ghi dir(bytecode_object) is') # logger.debug(dir(bytecode_object)) logger.debug('bytecode_object.main_module is') logger.debug(bytecode_object.main_module) for imp_stmt in bytecode_object.main_module.imports: #enumerate tktk logger.debug('imp_stmt is') logger.debug(imp_stmt) if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS): logger.debug("EMERGENCY") x = 0 for decl in bytecode_object.declarations: # logger.debug('decl.formal_parameters is') # logger.debug(decl.formal_parameters) logger.debug('decl.kind is') logger.debug(decl.kind) if decl.kind == Declaration.METHOD: logger.debug('decl.formal_parameters is') logger.debug(decl.formal_parameters) logger.debug('dir(decl) is') logger.debug(dir(decl)) cflow = ControlFlow(decl) # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) # vv Not sure if valid any more vv # We can't just pass root in -- the stack, environment etc. will progagate otherwise. # ^^ Not sure if valid any more ^^ if decl.kind == Declaration.METHOD: cflow.taint_propagation(cflow.root, tainted=set(decl.formal_parameters), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') else: cflow.taint_propagation(cflow.root, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 with open("junkhacker.log.py") as log_file: logs = log_file.read() there_was_an_error = "Traceback" assert logs.count(there_was_an_error) == 0 returning = ") - returning " return_value0 = str([{'*const_message': '*const_Yo'}]) key_string0 = returning + return_value0 assert logs.count(key_string0) == 6 return_value1 = str(['*const_None']) key_string1 = returning + return_value1 assert logs.count(key_string1) == 2 return_value2 = str([{'*const_message': '*const_success'}]) key_string2 = returning + return_value2 # Why 5? Why tryexcept 2 + tryexceptexception 3? Oh well. assert logs.count(key_string2) == 5 return_value3 = str([{'*const_message': '*const_Failed'}]) key_string3 = returning + return_value3 assert logs.count(key_string3) == 2
def test_kwargs(): """ """ f = open("files_to_test_against/kwargs.py") source = f.read() logger.debug('source is') logger.debug(source) bytecode_object = BytecodeObject('<string>') compiled_code = get_co(source) bytecode_object.parse_code(compiled_code) logger.debug('ghi declarations are') logger.debug(bytecode_object.declarations) # logger.debug('ghi dir(bytecode_object) is') # logger.debug(dir(bytecode_object)) logger.debug('bytecode_object.main_module is') logger.debug(bytecode_object.main_module) for imp_stmt in bytecode_object.main_module.imports: #enumerate tktk logger.debug('imp_stmt is') logger.debug(imp_stmt) if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS): logger.debug("EMERGENCY") x = 0 for decl in bytecode_object.declarations: # logger.debug('decl.formal_parameters is') # logger.debug(decl.formal_parameters) logger.debug('decl.kind is') logger.debug(decl.kind) if decl.kind == Declaration.METHOD: logger.debug('decl.formal_parameters is') logger.debug(decl.formal_parameters) logger.debug('dir(decl) is') logger.debug(dir(decl)) cflow = ControlFlow(decl) # Print BasicBlocks logger.debug("Blocks in CFG are: ") for b in cflow.blocks: logger.debug(b) # vv Not sure if valid any more vv # We can't just pass root in -- the stack, environment etc. will progagate otherwise. # ^^ Not sure if valid any more ^^ if decl.kind == Declaration.METHOD: cflow.taint_propagation(cflow.root, tainted=set(decl.formal_parameters), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') else: cflow.taint_propagation(cflow.root, tainted=set(), stack=[], environment={}, immune={}, previous_edges=set(), new_edge='') logger.debug('dominators.frontier is') logger.debug(cflow.dominators.frontier) f = open('VIEWS' + str(x) + '.dot', 'w') f.write(cflow.graph.to_dot()) f.close() G = pgv.AGraph('VIEWS' + str(x) + '.dot', strict=False, overlap=False, splines='spline') G.layout() G.draw('VIEWS' + str(x) + '.png') x = x + 1 with open("junkhacker.log.py") as log_file: logs = log_file.read() kwargs = { '*const_user': '******'Alice.me\', [], {})"', '*const_consumer': 'consumer', '*const_fast': 'fast' } key_string = "type(kwargs) is <class 'junkhacker.analysis.basicBlockInterpreter.kdict'> and kwargs is " + str( kwargs) assert key_string in logs assert logs.count(key_string) == 1