Example #1
0
def test_walker():
  g = DiGraph()
  n1 = DiGraph.make_node(data='n1')
  n2 = DiGraph.make_node(data='n2')
  n3 = DiGraph.make_node(data='n3')
  n4 = DiGraph.make_node(data='n4')
  n5 = DiGraph.make_node(data='n5')
  n6 = DiGraph.make_node(data='n6')

  e12 = DiGraph.make_edge(source=n1, dest=n2)
  e13 = DiGraph.make_edge(source=n1, dest=n3)
  e14 = DiGraph.make_edge(source=n1, dest=n4)
  e24 = DiGraph.make_edge(source=n2, dest=n4)
  e34 = DiGraph.make_edge(source=n3, dest=n4)
  e45 = DiGraph.make_edge(source=n4, dest=n5)
  e56 = DiGraph.make_edge(source=n5, dest=n6)
  e61 = DiGraph.make_edge(source=n6, dest=n1) # add cycle

  g.add_edge(e12)
  g.add_edge(e13)
  g.add_edge(e14)
  g.add_edge(e24)
  g.add_edge(e34)
  g.add_edge(e45)
  g.add_edge(e56)
  g.add_edge(e61)

  class EdgePrinterVisitor(EdgeVisitor):
    def __init__(self):
      EdgeVisitor.__init__(self)
      self.edges = []

    def visit(self, edge):
      self.edges.append(edge)

  visitor = EdgePrinterVisitor()
  walker = Walker(g, visitor)
  walker.traverse(n1)

  logger.debug("Post-order DFS: %s", dfs_postorder_nodes(g, n1))


  assert len(visitor.edges) == 8

  # logger.debug("\n" + g.to_dot())

  s = DiGraph()
  n1 = s.make_add_node(data='n1')
  n2 = s.make_add_node(data='n2')
  n3 = s.make_add_node(data='n3')
  e12 = s.make_add_edge(n1, n2)
  e23 = s.make_add_edge(n2, n3)
  logger.debug("Post-order DFS: %s", dfs_postorder_nodes(s, n1))
def test_loop_breaks():
    logger.debug("test_loop_breaks")
    co_simple = get_co(LOOP_BREAK_CASE)
    assert co_simple is not None

    bytecode_object = BytecodeObject('<string>')
    bytecode_object.parse_code(co_simple)

    assert len(bytecode_object.declarations) == 2

    for decl in bytecode_object.declarations:
        cflow = ControlFlow(decl)
        assert cflow.blocks is not None
        assert len(cflow.dominators.dom) > 0
Example #3
0
def test_inverse_graph():
  g = DiGraph()
  n1 = DiGraph.make_node(data='n1')
  n2 = DiGraph.make_node(data='n2')
  n3 = DiGraph.make_node(data='n3')
  n4 = DiGraph.make_node(data='n4')

  e12 = DiGraph.make_edge(source=n1, dest=n2)
  e13 = DiGraph.make_edge(source=n1, dest=n3)
  e14 = DiGraph.make_edge(source=n1, dest=n4)
  e24 = DiGraph.make_edge(source=n2, dest=n4)
  e34 = DiGraph.make_edge(source=n3, dest=n4)

  g.add_edge(e12)
  g.add_edge(e13)
  g.add_edge(e14)
  g.add_edge(e24)
  g.add_edge(e34)

  logger.debug("Normal graph :=\n%s", g.to_dot())

  inv_g = g.inverse()
  logger.debug("Inverse graph :=\n%s", inv_g.to_dot())
def test_user_pic():
    co_simple = get_co(USER_PIC)
    assert co_simple is not None

    bytecode_object = BytecodeObject('<string>')
    bytecode_object.parse_code(co_simple)

    assert len(bytecode_object.declarations) == 2

    for decl in bytecode_object.declarations:
        cflow = ControlFlow(decl)

        # Print BasicBlocks
        logger.debug("Blocks in CFG are: ")
        for b in cflow.blocks:
            logger.debug(b)

        logger.debug('dominators.frontier is')
        logger.debug(cflow.dominators.frontier)

        f = open('USER_PIC.dot', 'w')
        f.write(cflow.graph.to_dot())
        f.close()
def test_if_statements():
    co_simple = get_co(IF_STMTS_CASE)
    assert co_simple is not None

    bytecode_object = BytecodeObject('<strhhhhhing>')
    bytecode_object.parse_code(co_simple)

    logger.debug('bytecode_object is ')
    logger.debug(dir(bytecode_object))
    # logger.debug('bytecode_object.bytecode is ')
    # logger.debug(bytecode_object.bytecode)
    logger.debug('bytecode_object.loads is ')
    logger.debug(bytecode_object.loads)
    logger.debug('bytecode_object.stores is ')
    logger.debug(bytecode_object.stores)
    logger.debug('bytecode_object.compares is ')
    logger.debug(bytecode_object.compares)
    logger.debug('bytecode_object.rets is ')
    logger.debug(bytecode_object.rets)

    logger.debug('Finished parsing code\n')
    assert len(bytecode_object.declarations) == 1

    for decl in bytecode_object.declarations:
        cflow = ControlFlow(decl)

        # Print Dominance Frontier
        logger.debug('dominators.frontier is')
        logger.debug(cflow.dominators.frontier)

        # Print Dominators
        # logger.debug('Dominators are')
        # logger.debug('----------------------------')
        # cflow.dominators.print_tree(post_dom=False)
        # logger.debug('----------------------------')

        # Print BasicBlocks
        logger.debug("Blocks in CFG are: ")
        for b in cflow.blocks:
            logger.debug(b)

        # Create IF_STMTS_CASE.png
        f = open('IF_STMTS_CASE.dot', 'w')
        f.write(cflow.graph.to_dot())
        f.close()
        #G=pgv.AGraph("IF_STMTS_CASE.dot", strict=False, overlap=False, splines='spline')
        #G.layout()
        #G.draw('IF_STMTS_CASE.png')

        assert cflow.blocks is not None
        assert len(cflow.dominators.dom) > 0
Example #6
0
def test_delete_false_positives():
    """
  Test out huge mother f****r against fixed dominator code.
  This means both
      the buddy system worked.
      the dominators fix worked.
  """
    f = open("files_to_test_against/test_delete_false_positives.py")
    source = f.read()

    logger.debug('source is')
    logger.debug(source)

    bytecode_object = BytecodeObject('<string>')
    compiled_code = get_co(source)
    bytecode_object.parse_code(compiled_code)

    logger.debug('Declarations are')
    logger.debug(bytecode_object.declarations)

    logger.debug('bytecode_object.main_module is')
    logger.debug(bytecode_object.main_module)

    # We can save the cflow for each decl
    all_the_decls = {}

    # First loop gets all of the cflow objects for each decl
    for decl in bytecode_object.declarations:
        logger.debug('decl.kind is')
        logger.debug(decl.kind)
        if decl.kind == Declaration.METHOD:
            logger.debug('decl.formal_parameters are')
            logger.debug(decl.formal_parameters)
            for param in decl.formal_parameters:
                if param != 'self':
                    logger.debug("Going to say %s is tainted", param)

        logger.debug('dir(decl) is')
        logger.debug(dir(decl))
        cflow = ControlFlow(decl)

        all_the_decls[decl] = cflow

    logger.debug("Decls are in this order %s",
                 reversed(bytecode_object.declarations))
    logger.debug("Decls are in this order type(%s)",
                 type(bytecode_object.declarations))

    x = 0
    # Now we can pass every cflow object to every decl
    for decl in reversed(bytecode_object.declarations):
        cflow = all_the_decls[decl]
        # Print BasicBlocks
        logger.debug("Blocks in CFG are: ")
        for b in cflow.blocks:
            logger.debug(b)

        logger.debug('dominators.frontier is')
        logger.debug(cflow.dominators.frontier)

        graph = cflow.graph

        # Start new code
        logger.debug("Uh what, # of nodes is %s", len(graph.nodes))
        logger.debug("Here they are %s", graph.nodes)
        # Access first element of the set
        for node in graph.nodes:
            if node.kind == 'ENTRY':
                # We just want to run this on the root
                bfs_set_buddies(graph, node)
                # raise
                break
        # End new code

        for node in graph.nodes:
            logger.debug("Bing %s is %s", type(node), node)
            predecessors = graph.in_edges(node)
            if len(predecessors) >= 2:
                logger.debug("predecessors for %s are %s", node, predecessors)

        for in_edge in cflow.graph.in_edges(cflow.exit_node):
            logger.debug("Node in_edge.source.data is %s", in_edge.source.data)
            logger.debug("Node in_edge.source.data.length is %s",
                         in_edge.source.data.length)
            logger.debug("Node in_edge.source.data.bytecode_slice is %s",
                         in_edge.source.data.bytecode_slice)
            logger.debug("Node in_edge.source.data.end_target is %s",
                         in_edge.source.data.end_target)
            in_edge.source.data.has_ret_value = True

        logger.debug('Now starting taint propagation')

        # vv Not sure if valid any more vv
        # We can't just pass root in -- the stack, environment etc. will progagate otherwise.
        # ^^ Not sure if valid any more ^^
        if decl.kind == Declaration.METHOD:
            for param in decl.formal_parameters:
                logger.debug("formal_param is %s", param)
                one_at_a_time = set()
                if param != 'self':
                    one_at_a_time.add(param)
                logger.debug('one_at_a_time is %s', one_at_a_time)
                cflow.taint_propagation(cflow.root,
                                        decl,
                                        tainted=one_at_a_time,
                                        stack=[],
                                        environment={},
                                        immune={},
                                        previous_edges=set(),
                                        new_edge='',
                                        other_decls=all_the_decls,
                                        interprocedural_mode=True,
                                        stack_of_buddies=[])
                if decl.pretty_summary:
                    logger.error(decl.formal_parameters)
                    logger.error(decl.pretty_summary)
                    logger.error(decl.one_param_summary)
                    decl.all_params_summary[param] = decl.one_param_summary
                    logger.error("Here it is :)")
                    logger.error(decl.all_params_summary)

                if decl.vuln_summary:
                    logger.error("ONE TIME ONLY!")
                    decl.all_params_vuln_summary[param] = decl.vuln_summary
                    # We clean the slate for the next param
                    decl.vuln_summary = []
                    decl.inter_vuln_summary = []

                if decl.returns_tainted:
                    decl.all_params_returns_tainted[param] = True
                    logger.debug("BEFORE")
                    decl.returns_tainted = False

                f = open('VIEWS' + str(x) + '.dot', 'w')
                f.write(cflow.graph.to_dot())
                f.close()

                G = pgv.AGraph('VIEWS' + str(x) + '.dot',
                               strict=False,
                               overlap=False,
                               splines='spline')
                G.layout()
                G.draw('VIEWS' + str(x) + '.png')
                x = x + 1
        else:
            cflow.taint_propagation(cflow.root,
                                    current_decl=None,
                                    tainted=set(),
                                    stack=[],
                                    environment={},
                                    immune={},
                                    previous_edges=set(),
                                    new_edge='',
                                    other_decls=all_the_decls,
                                    interprocedural_mode=True,
                                    stack_of_buddies=[])
            f = open('VIEWS' + str(x) + '.dot', 'w')
            f.write(cflow.graph.to_dot())
            f.close()

            G = pgv.AGraph('VIEWS' + str(x) + '.dot',
                           strict=False,
                           overlap=False,
                           splines='spline')
            G.layout()
            G.draw('VIEWS' + str(x) + '.png')
            x = x + 1

    logger.debug("All the decls dict is %s", all_the_decls)
    with open("junkhacker.log.py") as log_file:
        logs = log_file.read()
        first_key_string = "So current_bloc.succ == stack_of_buddies[-1]"
        assert logs.count(first_key_string) == 31

        last_key_string = "Uh oh spaghettios"
        assert logs.count(last_key_string) == 2
def test_left_foo_on_stack_due_to_conditional():
    """

  """
    f = open("files_to_test_against/left_foo_on_stack_due_to_conditional.py")
    source = f.read()

    logger.debug('source is')
    logger.debug(source)

    bytecode_object = BytecodeObject('<string>')
    compiled_code = get_co(source)
    bytecode_object.parse_code(compiled_code)

    logger.debug('ghi declarations are')
    logger.debug(bytecode_object.declarations)
    # logger.debug('ghi dir(bytecode_object) is')
    # logger.debug(dir(bytecode_object))
    logger.debug('bytecode_object.main_module is')
    logger.debug(bytecode_object.main_module)
    for imp_stmt in bytecode_object.main_module.imports:  #enumerate tktk
        logger.debug('imp_stmt is')
        logger.debug(imp_stmt)
        if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS):
            logger.debug("EMERGENCY")
    x = 0
    for decl in bytecode_object.declarations:
        # logger.debug('decl.formal_parameters is')
        # logger.debug(decl.formal_parameters)
        logger.debug('decl.kind is')
        logger.debug(decl.kind)
        if decl.kind == Declaration.METHOD:
            logger.debug('decl.formal_parameters is')
            logger.debug(decl.formal_parameters)
        logger.debug('dir(decl) is')
        logger.debug(dir(decl))

        cflow = ControlFlow(decl)

        # Print BasicBlocks
        logger.debug("Blocks in CFG are: ")
        for b in cflow.blocks:
            logger.debug(b)

        # We can't just pass root in -- the stack, environment etc. will progagate otherwise.
        if decl.kind == Declaration.METHOD:
            cflow.taint_propagation(cflow.root,
                                    tainted=set(decl.formal_parameters),
                                    stack=[],
                                    environment={},
                                    immune={},
                                    previous_edges=set(),
                                    new_edge='')
        else:
            cflow.taint_propagation(cflow.root,
                                    tainted=set(),
                                    stack=[],
                                    environment={},
                                    immune={},
                                    previous_edges=set(),
                                    new_edge='')

        # logger.debug('dominators.frontier is')
        # logger.debug(cflow.dominators.frontier)

        f = open('VIEWS' + str(x) + '.dot', 'w')
        f.write(cflow.graph.to_dot())
        f.close()

        G = pgv.AGraph('VIEWS' + str(x) + '.dot',
                       strict=False,
                       overlap=False,
                       splines='spline')
        G.layout()
        G.draw('VIEWS' + str(x) + '.png')
        x = x + 1

    # assert
    with open("junkhacker.log.py") as log_file:
        logs = log_file.read()
        assert "Cleaning a leftover *const_foo due to a JUMP_IF_TRUE_OR_POP instruction at edge 0->36" in logs
        assert logs.count("Cleaning a leftover") == 1
def test_tryexcept():
    """

  """
    # Part 1
    f = open("files_to_test_against/tryexcept.py")
    source = f.read()

    logger.debug('source is')
    logger.debug(source)

    bytecode_object = BytecodeObject('<string>')
    compiled_code = get_co(source)
    bytecode_object.parse_code(compiled_code)

    logger.debug('ghi declarations are')
    logger.debug(bytecode_object.declarations)
    # logger.debug('ghi dir(bytecode_object) is')
    # logger.debug(dir(bytecode_object))
    logger.debug('bytecode_object.main_module is')
    logger.debug(bytecode_object.main_module)
    for imp_stmt in bytecode_object.main_module.imports:  #enumerate tktk
        logger.debug('imp_stmt is')
        logger.debug(imp_stmt)
        if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS):
            logger.debug("EMERGENCY")
    x = 0
    for decl in bytecode_object.declarations:
        # logger.debug('decl.formal_parameters is')
        # logger.debug(decl.formal_parameters)
        logger.debug('decl.kind is')
        logger.debug(decl.kind)
        if decl.kind == Declaration.METHOD:
            logger.debug('decl.formal_parameters is')
            logger.debug(decl.formal_parameters)
        logger.debug('dir(decl) is')
        logger.debug(dir(decl))

        cflow = ControlFlow(decl)

        # Print BasicBlocks
        logger.debug("Blocks in CFG are: ")
        for b in cflow.blocks:
            logger.debug(b)

        # vv Not sure if valid any more vv
        # We can't just pass root in -- the stack, environment etc. will progagate otherwise.
        # ^^ Not sure if valid any more ^^
        if decl.kind == Declaration.METHOD:
            cflow.taint_propagation(cflow.root,
                                    tainted=set(decl.formal_parameters),
                                    stack=[],
                                    environment={},
                                    immune={},
                                    previous_edges=set(),
                                    new_edge='')
        else:
            cflow.taint_propagation(cflow.root,
                                    tainted=set(),
                                    stack=[],
                                    environment={},
                                    immune={},
                                    previous_edges=set(),
                                    new_edge='')

        logger.debug('dominators.frontier is')
        logger.debug(cflow.dominators.frontier)

        f = open('VIEWS' + str(x) + '.dot', 'w')
        f.write(cflow.graph.to_dot())
        f.close()

        G = pgv.AGraph('VIEWS' + str(x) + '.dot',
                       strict=False,
                       overlap=False,
                       splines='spline')
        G.layout()
        G.draw('VIEWS' + str(x) + '.png')
        x = x + 1

    # with open("junkhacker.log.py") as log_file:
    #   logs = log_file.read()
    #   there_was_an_error = "Traceback"
    #   assert logs.count(there_was_an_error) == 0

    # Part 2
    f = open("files_to_test_against/tryexceptexception.py")
    source = f.read()

    logger.debug('source is')
    logger.debug(source)

    bytecode_object = BytecodeObject('<string>')
    compiled_code = get_co(source)
    bytecode_object.parse_code(compiled_code)

    logger.debug('ghi declarations are')
    logger.debug(bytecode_object.declarations)
    # logger.debug('ghi dir(bytecode_object) is')
    # logger.debug(dir(bytecode_object))
    logger.debug('bytecode_object.main_module is')
    logger.debug(bytecode_object.main_module)
    for imp_stmt in bytecode_object.main_module.imports:  #enumerate tktk
        logger.debug('imp_stmt is')
        logger.debug(imp_stmt)
        if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS):
            logger.debug("EMERGENCY")
    x = 0
    for decl in bytecode_object.declarations:
        # logger.debug('decl.formal_parameters is')
        # logger.debug(decl.formal_parameters)
        logger.debug('decl.kind is')
        logger.debug(decl.kind)
        if decl.kind == Declaration.METHOD:
            logger.debug('decl.formal_parameters is')
            logger.debug(decl.formal_parameters)
        logger.debug('dir(decl) is')
        logger.debug(dir(decl))

        cflow = ControlFlow(decl)

        # Print BasicBlocks
        logger.debug("Blocks in CFG are: ")
        for b in cflow.blocks:
            logger.debug(b)

        # vv Not sure if valid any more vv
        # We can't just pass root in -- the stack, environment etc. will progagate otherwise.
        # ^^ Not sure if valid any more ^^
        if decl.kind == Declaration.METHOD:
            cflow.taint_propagation(cflow.root,
                                    tainted=set(decl.formal_parameters),
                                    stack=[],
                                    environment={},
                                    immune={},
                                    previous_edges=set(),
                                    new_edge='')
        else:
            cflow.taint_propagation(cflow.root,
                                    tainted=set(),
                                    stack=[],
                                    environment={},
                                    immune={},
                                    previous_edges=set(),
                                    new_edge='')

        logger.debug('dominators.frontier is')
        logger.debug(cflow.dominators.frontier)

        f = open('VIEWS' + str(x) + '.dot', 'w')
        f.write(cflow.graph.to_dot())
        f.close()

        G = pgv.AGraph('VIEWS' + str(x) + '.dot',
                       strict=False,
                       overlap=False,
                       splines='spline')
        G.layout()
        G.draw('VIEWS' + str(x) + '.png')
        x = x + 1

    with open("junkhacker.log.py") as log_file:
        logs = log_file.read()
        there_was_an_error = "Traceback"
        assert logs.count(there_was_an_error) == 0

        returning = ") - returning "

        return_value0 = str([{'*const_message': '*const_Yo'}])
        key_string0 = returning + return_value0
        assert logs.count(key_string0) == 6

        return_value1 = str(['*const_None'])
        key_string1 = returning + return_value1
        assert logs.count(key_string1) == 2

        return_value2 = str([{'*const_message': '*const_success'}])
        key_string2 = returning + return_value2
        # Why 5? Why tryexcept 2 + tryexceptexception 3? Oh well.
        assert logs.count(key_string2) == 5

        return_value3 = str([{'*const_message': '*const_Failed'}])
        key_string3 = returning + return_value3
        assert logs.count(key_string3) == 2
def test_kwargs():
    """

  """
    f = open("files_to_test_against/kwargs.py")
    source = f.read()

    logger.debug('source is')
    logger.debug(source)

    bytecode_object = BytecodeObject('<string>')
    compiled_code = get_co(source)
    bytecode_object.parse_code(compiled_code)

    logger.debug('ghi declarations are')
    logger.debug(bytecode_object.declarations)
    # logger.debug('ghi dir(bytecode_object) is')
    # logger.debug(dir(bytecode_object))
    logger.debug('bytecode_object.main_module is')
    logger.debug(bytecode_object.main_module)
    for imp_stmt in bytecode_object.main_module.imports:  #enumerate tktk
        logger.debug('imp_stmt is')
        logger.debug(imp_stmt)
        if any(bad_imp in str(imp_stmt) for bad_imp in BAD_IMPORTS):
            logger.debug("EMERGENCY")
    x = 0
    for decl in bytecode_object.declarations:
        # logger.debug('decl.formal_parameters is')
        # logger.debug(decl.formal_parameters)
        logger.debug('decl.kind is')
        logger.debug(decl.kind)
        if decl.kind == Declaration.METHOD:
            logger.debug('decl.formal_parameters is')
            logger.debug(decl.formal_parameters)
        logger.debug('dir(decl) is')
        logger.debug(dir(decl))

        cflow = ControlFlow(decl)

        # Print BasicBlocks
        logger.debug("Blocks in CFG are: ")
        for b in cflow.blocks:
            logger.debug(b)

        # vv Not sure if valid any more vv
        # We can't just pass root in -- the stack, environment etc. will progagate otherwise.
        # ^^ Not sure if valid any more ^^
        if decl.kind == Declaration.METHOD:
            cflow.taint_propagation(cflow.root,
                                    tainted=set(decl.formal_parameters),
                                    stack=[],
                                    environment={},
                                    immune={},
                                    previous_edges=set(),
                                    new_edge='')
        else:
            cflow.taint_propagation(cflow.root,
                                    tainted=set(),
                                    stack=[],
                                    environment={},
                                    immune={},
                                    previous_edges=set(),
                                    new_edge='')

        logger.debug('dominators.frontier is')
        logger.debug(cflow.dominators.frontier)

        f = open('VIEWS' + str(x) + '.dot', 'w')
        f.write(cflow.graph.to_dot())
        f.close()

        G = pgv.AGraph('VIEWS' + str(x) + '.dot',
                       strict=False,
                       overlap=False,
                       splines='spline')
        G.layout()
        G.draw('VIEWS' + str(x) + '.png')
        x = x + 1

    with open("junkhacker.log.py") as log_file:
        logs = log_file.read()

        kwargs = {
            '*const_user': '******'Alice.me\', [], {})"',
            '*const_consumer': 'consumer',
            '*const_fast': 'fast'
        }
        key_string = "type(kwargs) is <class 'junkhacker.analysis.basicBlockInterpreter.kdict'> and kwargs is " + str(
            kwargs)

        assert key_string in logs
        assert logs.count(key_string) == 1