Example #1
0
    def PopulateModel(self, trace):
        self.CleanModel()

        w = NotifyProgress()
        w.show()
        ctr = 0
        max = len(trace)

        for line in trace:

            assert isinstance(line, Traceline)
            tid = QtGui.QStandardItem('%s' % line.thread_id)
            addr = QtGui.QStandardItem('%x' % line.addr)
            disasm = QtGui.QStandardItem(line.disasm_str())
            comment = QtGui.QStandardItem(''.join(c for c in line.comment
                                                  if line.comment is not None))
            context = QtGui.QStandardItem(''.join('%s:%s ' % (c, line.ctx[c])
                                                  for c in line.ctx.keys()
                                                  if line.ctx is not None))

            ctr += 1
            w.pbar_set(int(float(ctr) / float(max) * 100))

            self.sim.appendRow([tid, addr, disasm, comment, context])

        w.close()

        self.treeView.resizeColumnToContents(0)
        self.treeView.resizeColumnToContents(1)
        self.treeView.resizeColumnToContents(2)
        self.treeView.resizeColumnToContents(3)
        self.treeView.resizeColumnToContents(4)
Example #2
0
    def PopulateModel(self, trace):
        self.CleanModel()

        w = NotifyProgress()
        w.show()
        ctr = 0
        max = len(trace)

        for line in trace:

            assert isinstance(line, Traceline)
            tid = QtGui.QStandardItem('%s' % line.thread_id)
            addr = QtGui.QStandardItem('%x' % line.addr)
            disasm = QtGui.QStandardItem(line.disasm_str())
            comment = QtGui.QStandardItem(''.join(c for c in line.comment if line.comment is not None))
            context = QtGui.QStandardItem(''.join('%s:%s ' % (c, line.ctx[c]) for c in line.ctx.keys() if line.ctx is not None))

            ctr += 1
            w.pbar_set(int(float(ctr) / float(max) * 100))

            self.sim.appendRow([tid, addr, disasm, comment, context])

        w.close()

        self.treeView.resizeColumnToContents(0)
        self.treeView.resizeColumnToContents(1)
        self.treeView.resizeColumnToContents(2)
        self.treeView.resizeColumnToContents(3)
        self.treeView.resizeColumnToContents(4)
Example #3
0
def manual_analysis(choice):
    """
    Allows the execution of analysis functions with the manual flag. Output will mainly be in the Output window and some instances require user interaction.
    :param choice: the manual function to be executed
    """
    w = NotifyProgress('Address count')
    w.show()
    trace = prepare_trace()
    func = manual_func[choice]
    w.pbar_update(10)
    func(deepcopy(trace), manual=True, update=w)
    w.close()
Example #4
0
    def PopulateModel(self, threshold):
        self.CleanModel()

        w = NotifyProgress()
        ctr = 0
        max = len(self.trace)
        prev = None
        for line in self.trace:
            assert isinstance(line, Traceline)

            if prev is not None and threshold > 2:
                if prev is not None:
                    grade = QtGui.QStandardItem(' ')
                    tid = QtGui.QStandardItem(' ')
                    addr = QtGui.QStandardItem(' ')
                    disasm = QtGui.QStandardItem('previous CPU context:')
                    comment = QtGui.QStandardItem(' ')
                    context = QtGui.QStandardItem(''.join('%s:%s ' % (c, prev.ctx[c]) for c in prev.ctx.keys() if prev.ctx is not None))
                    self.sim.appendRow([grade, tid, addr, disasm, comment, context])
                grade = QtGui.QStandardItem('%s' % line.grade)
                tid = QtGui.QStandardItem('%s' % line.thread_id)
                addr = QtGui.QStandardItem('%x' % line.addr)
                disasm = QtGui.QStandardItem(line.disasm_str())
                comment = QtGui.QStandardItem(''.join(c for c in line.comment if line.comment is not None))
                context = QtGui.QStandardItem(''.join('%s:%s ' % (c, line.ctx[c]) for c in line.ctx.keys() if line.ctx is not None))

                self.sim.appendRow([grade, tid, addr, disasm, comment, context])

            ctr += 1
            w.pbar_set(int(float(ctr) / float(max) * 100))
            prev = line
        w.close()

        self.treeView.resizeColumnToContents(0)
        self.treeView.resizeColumnToContents(1)
        self.treeView.resizeColumnToContents(2)
        self.treeView.resizeColumnToContents(3)
        self.treeView.resizeColumnToContents(4)
        self.treeView.resizeColumnToContents(5)
Example #5
0
def manual_analysis(choice):
    """
    Allows the execution of analysis functions with the manual flag. Output will mainly be in the Output window and some instances require user interaction.
    :param choice: the manual function to be executed
    """
    w = NotifyProgress('Address count')
    w.show()
    trace = prepare_trace()
    func = manual_func[choice]
    w.pbar_update(10)
    func(deepcopy(trace), manual=True, update=w)
    w.close()
Example #6
0
    def PopulateModel(self, threshold):
        self.CleanModel()

        w = NotifyProgress()
        ctr = 0
        max = len(self.trace)
        prev = None
        for line in self.trace:
            assert isinstance(line, Traceline)

            if prev is not None and threshold > 2:
                if prev is not None:
                    grade = QtGui.QStandardItem(' ')
                    tid = QtGui.QStandardItem(' ')
                    addr = QtGui.QStandardItem(' ')
                    disasm = QtGui.QStandardItem('previous CPU context:')
                    comment = QtGui.QStandardItem(' ')
                    context = QtGui.QStandardItem(''.join(
                        '%s:%s ' % (c, prev.ctx[c]) for c in prev.ctx.keys()
                        if prev.ctx is not None))
                    self.sim.appendRow(
                        [grade, tid, addr, disasm, comment, context])
                grade = QtGui.QStandardItem('%s' % line.grade)
                tid = QtGui.QStandardItem('%s' % line.thread_id)
                addr = QtGui.QStandardItem('%x' % line.addr)
                disasm = QtGui.QStandardItem(line.disasm_str())
                comment = QtGui.QStandardItem(''.join(
                    c for c in line.comment if line.comment is not None))
                context = QtGui.QStandardItem(''.join(
                    '%s:%s ' % (c, line.ctx[c]) for c in line.ctx.keys()
                    if line.ctx is not None))

                self.sim.appendRow(
                    [grade, tid, addr, disasm, comment, context])

            ctr += 1
            w.pbar_set(int(float(ctr) / float(max) * 100))
            prev = line
        w.close()

        self.treeView.resizeColumnToContents(0)
        self.treeView.resizeColumnToContents(1)
        self.treeView.resizeColumnToContents(2)
        self.treeView.resizeColumnToContents(3)
        self.treeView.resizeColumnToContents(4)
        self.treeView.resizeColumnToContents(5)
Example #7
0
    def PopulateModel(self):
        self.Clean()
        vmr = get_vmr()
        w = NotifyProgress()
        w.show()
        ctr = 0
        max = len(self.trace)

        # present clustering analysis in viewer
        prev_ctx = defaultdict(lambda: 0)
        for line in self.trace:

            ctr += 1
            w.pbar_set(int(float(ctr) / float(max) * 100))

            if isinstance(line, Traceline):
                tid = QtGui.QStandardItem('%s' % line.thread_id)
                addr = QtGui.QStandardItem('%x' % line.addr)
                disasm = QtGui.QStandardItem(line.disasm_str())
                comment = QtGui.QStandardItem(''.join(
                    c for c in line.comment if line.comment is not None))
                context = QtGui.QStandardItem(''.join(
                    '%s:%s ' % (c, line.ctx[c]) for c in line.ctx
                    if line.ctx is not None))
                prev_ctx = line.ctx
                self.sim.appendRow([tid, addr, disasm, comment, context])
            else:
                cluster_node = QtGui.QStandardItem(
                    'Cluster %x-%x' % (line[0].addr, line[-1].addr))
                self.sim.appendRow(cluster_node)
                if vmr.bb:
                    cluster = line
                    bbs = []
                    bb = []
                    # subdivide the clusters by basic blocks
                    for line in cluster:
                        assert isinstance(line, Traceline)
                        if is_basic_block_end(line.addr):
                            bb.append(line)
                            bbs.append(bb)
                            bb = []
                        else:
                            bb.append(line)

                    for bb in bbs:

                        bb_sum = self.bb_func(bb, self.ctx_reg_size, prev_ctx)
                        bb_node = QtGui.QStandardItem(
                            'BB%s Summary %x-%x: %s\t%s\t%s' %
                            (bbs.index(bb), bb[0].addr, bb[-1].addr, ''.join(
                                '%s ; ' % (''.join('%s, ' % c for c in line))
                                for line in bb_sum.disasm), ''.join(
                                    '%s, ' % c
                                    for c in filter(None, bb_sum.comment)
                                    if bb_sum.comment is not None), ''.join(
                                        '%s:%s ' % (c, bb_sum.ctx[c])
                                        for c in bb_sum.ctx
                                        if bb_sum.ctx is not None)))
                        for line in bb:
                            tid = QtGui.QStandardItem('%s' % line.thread_id)
                            addr = QtGui.QStandardItem('%x' % line.addr)
                            disasm = QtGui.QStandardItem(line.disasm_str())
                            comment = QtGui.QStandardItem(''.join(
                                c for c in line.comment
                                if line.comment is not None))
                            context = QtGui.QStandardItem(''.join(
                                '%s:%s ' % (c, line.ctx[c]) for c in line.ctx
                                if line.ctx is not None))
                            bb_node.appendRow(
                                [tid, addr, disasm, comment, context])
                        cluster_node.appendRow(bb_node)
                        self.treeView.setFirstColumnSpanned(
                            bbs.index(bb), cluster_node.index(), True)

                        prev_ctx = bb[-1].ctx
                else:
                    for l in line:
                        tid = QtGui.QStandardItem('%s' % l.thread_id)
                        addr = QtGui.QStandardItem('%x' % l.addr)
                        disasm = QtGui.QStandardItem(l.disasm_str())
                        comment = QtGui.QStandardItem(''.join(
                            c for c in l.comment if l.comment is not None))
                        context = QtGui.QStandardItem(''.join(
                            '%s:%s ' % (c, l.ctx[c]) for c in l.ctx
                            if l.ctx is not None))
                        cluster_node.appendRow(
                            [tid, addr, disasm, comment, context])

        w.close()

        self.treeView.resizeColumnToContents(0)
        self.treeView.resizeColumnToContents(1)
        self.treeView.resizeColumnToContents(2)
        self.treeView.resizeColumnToContents(3)
        self.treeView.resizeColumnToContents(4)
Example #8
0
def grading_automaton(visualization=0):
    """
    Grading System Analysis computes a grade for every trace line. It is basically a combination of all available analysis capabilities and runs them one after another, increases the grade
    for those trace lines which are in the analysis result and then runs the next trace analysis. In between the analysis runs a pattern matching run is started, to increase / decrease cer-
    tain trace lines grades based on known patterns. The patterns are modelled after known short comings of the analysis capabilities.
    :param trace: instruction trace
    :return: graded instruction trace
    """
    vmr = get_vmr()

    w = NotifyProgress('Grading')
    w.show()

    trace = prepare_trace()
    orig_trace = deepcopy(trace)
    try:
        ### INIT THE TRACE GRADES ###
        trace = init_grading(deepcopy(trace))
        w.pbar_update(10) # 10%

        ### REGISTER USAGE BASED: this must be done before optimization
        reg_dict = defaultdict(lambda: 0)

        # find the register infrastructure and vm addressing scheme -> this tells us which registers are used for addressing and are not important for grading_automaton
        try:
            for line in trace:
                assert isinstance(line, Traceline)
                if line.is_op2_reg and get_reg_class(line.disasm[2]) is not None:  # get reg class will only return != None for the 8-16 standard cpu regs
                    reg_dict[get_reg_class(line.disasm[2])] += 1

            # get the sorted list of regs highest occurence first
            sorted_keys = sorted(reg_dict.items(), key=operator.itemgetter(1), reverse=True)  # sorted_list = list of (reg_name, frequency)
            length = len(sorted_keys)
            w.pbar_update(10) # 20%
            # classify the important and less important registers
            if length % 2 == 0:
                important_regs = set(reg[0] for reg in sorted_keys[:(length / 2)])
                disregard_regs = set(reg[0] for reg in sorted_keys[(length / 2):])
            else:
                # if this is the case, one more register gets declared unimportant, since it is better to be more careful about raising grades
                important_regs = set(reg[0] for reg in sorted_keys[:(length - 1) / 2])
                disregard_regs = set(reg[0] for reg in sorted_keys[(length - 1) / 2:])
        except:
            pass


        ### OPTIMIZE TRACE ###
        try:
            if not trace.constant_propagation:
                trace = optimization_const_propagation(trace)
        except:
            pass
        w.pbar_update(10) #30%
        try:
            if not trace.stack_addr_propagation:
                trace = optimization_stack_addr_propagation(trace)
        except:
            pass

        ### REGISTER USAGE AND INPUT OUTPUT BASED ###
        # raise the grade of line containing input and output values
        try:
            values = find_input(deepcopy(trace)).union(find_output(deepcopy(trace)))
            for line in trace:
                for val in values:
                    if val in line.to_str_line():
                        line.raise_grade(vmr.in_out)

            w.pbar_update(10) #40%

            # backtrace regs and raise grade
            virt_regs = find_virtual_regs(deepcopy(trace))
            for key in virt_regs:
                if get_reg_class(key) in important_regs:
                    for line in follow_virt_reg(deepcopy(trace), virt_reg_addr=virt_regs[key]):
                        try:
                            for other in trace:
                                if line == other:
                                    other.raise_grade(vmr.in_out)
                        except ValueError:
                            print 'The line %s was not found in the trace, hence the grade could not be raised properly!' % line.to_str_line()
                elif get_reg_class(key) in disregard_regs:
                    for line in follow_virt_reg(deepcopy(trace), virt_reg_addr=virt_regs[key]):
                        try:
                            for other in trace:
                                if line == other:
                                    other.lower_grade(vmr.in_out)
                        except ValueError:
                            print 'The line %s was not found in the trace, hence the grade could not be lowered properly!' % line.to_str_line()
        except:
            pass
        w.pbar_update(5) #45%

        ### REGISTER USAGE FREQUENCY BASED ###
        try:
            # lower the grades for the most commonly used registers
            for line in trace:
                assert isinstance(line, Traceline)
                if line.is_op1_reg and get_reg_class(line.disasm[1]) is not None:  # get reg class will only return != None for the 8-16 standard cpu regs
                    reg_dict[get_reg_class(line.disasm[1])] += 1

            # get the sorted list of regs highest occurrence first
            sorted_keys = sorted(reg_dict.items(), key=operator.itemgetter(1), reverse=True)  # sorted_list = list of (reg_name, frequency)
            length = len(sorted_keys)
            w.pbar_update(5) #50%
            # classify the less important registers
            if length % 2 == 0:
                disregard_regs = set(reg[0] for reg in sorted_keys[:(length / 2)])
            else:
                disregard_regs = set(reg[0] for reg in sorted_keys[:(length - 1) / 2])


            for line in trace:
                assert isinstance(line, Traceline)
                if line.is_jmp or line.is_mov or line.is_pop or line.is_push or line.disasm[0].startswith('ret') or line.disasm[
                    0].startswith('inc') or line.disasm[0].startswith('lea'):
                    line.lower_grade(vmr.pa_ma)
                elif len(line.disasm) > 1 and get_reg_class(line.disasm[1]) in disregard_regs:
                    line.lower_grade(vmr.pa_ma)
        except:
            pass
        w.pbar_update(10) #60%

        ### CLUSTERING BASED ###
        try:
            # raise the grades of the unique lines after clustering
            cluster_result = repetition_clustering(deepcopy(trace))
            for line in cluster_result:
                if isinstance(line, Traceline):
                    trace[trace.index(line)].raise_grade(vmr.clu)
        except:
            pass
        w.pbar_update(10) #70%

        ### PEEPHOLE GRADING ###
        try:
            # peephole grading
            for line in trace:
                assert isinstance(line, Traceline)
                if line.disasm[0] in ['pop', 'push', 'inc', 'dec', 'lea', 'test'] or line.disasm[0].startswith('c') or line.is_jmp or line.is_mov or line.disasm[0].startswith('r'):
                    line.lower_grade(vmr.pa_ma)
                elif len(line.disasm) > 1 and get_reg_class(line.disasm[1]) > 4:
                    continue
                else:
                    line.raise_grade(vmr.pa_ma)
        except:
            pass
        w.pbar_update(10) #80%

        ### OPTIMIZATION BASED ###
        try:
            opti_trace = optimize(deepcopy(trace))
            w.pbar_update(10) #90%
            for line in opti_trace:
                assert isinstance(line, Traceline)
                try:  # trace is heavily changed after optimization, might not find the trace line in the pre_op_trace
                    trace[trace.index(line)].raise_grade(vmr.pa_ma)
                except:
                    pass
                # additionally raise grade for every line that uses the memory and is not a mov
                if line.disasm_len == 3 and line.is_op1_mem and not line.is_mov:
                    try:
                        trace[trace.index(line)].raise_grade(vmr.mem_use)
                    except:
                        pass
                else:
                    trace[trace.index(line)].lower_grade(vmr.pa_ma)
        except:
            pass
        w.pbar_update(5)

        ### STATIC OPTIMIZATION BASED ###
        # TODO atm this is a little workaround to include the static analysis results
        try:
            comments = set(v_inst.split(' ')[0] for v_inst in [Comment(ea) for ea in range(vmr.code_start, vmr.code_end)] if v_inst is not None)
            print comments
            ins = [c.lstrip('v').split('_')[0] for c in comments]
            for line in trace:
                if line.disasm[0] in ins:
                    line.raise_grade(vmr.static)

        except:
            pass
        w.pbar_update(5)

        ### RECURSION ###
        try:
            recursion = 0
            vm_func = find_vm_addr(orig_trace)
            for line in orig_trace:
                if line.disasm[0].startswith('call') and line.disasm[1].__contains__(vm_func):
                    recursion = recursion + 1
        except:
            pass
        w.close()

        grades = set([line.grade for line in trace])
        max_grade = max(grades)
        # raise the trace lines grade containing calls to maximum grade
        try:
            # such nach call und vm_addr
            for line in trace:
                if line.disasm[0].startswith('call') and line.disasm[1].__contains__(vm_func):
                    line.grade = max_grade
                elif line.disasm[1].__contains__('ss:') or line.disasm[2].__contains('ss:'):
                    line.grade = max_grade
        except:
            pass


        if visualization == 0:
            v = GradingViewer(trace, save=save)
            v.Show()
        else:
            threshold = AskLong(1, 'There are a total of %s grades: %s. Specify a threshold which lines to display:' % (len(grades), ''.join('%s ' % c for c in grades)))
            if threshold > -1:
                for line in trace:
                    if line.grade >= threshold:
                        print line.grade, line.to_str_line()

    except Exception, e:
        w.close()
        msg(e.message + '\n')
Example #9
0
def clustering_analysis(visualization=0, grade=False, trace=None):
    """
    Clustering analysis wrapper which clusters the trace into repeating instructions and presents the results in the Clustering Viewer.
    :param visualization: output via Clustering Viewer or output window
    :param grade: grading
    :param trace: instruction trace
    """
    if trace is None:
        trace = prepare_trace()

    w = NotifyProgress('Clustering')
    w.show()

    try:
        try:
            if not trace.constant_propagation:
                trace = optimization_const_propagation(trace)
            if not trace.stack_addr_propagation:
                trace = optimization_stack_addr_propagation(trace)
        except:
            pass
        w.pbar_update(30)
        # cluster
        vr = find_virtual_regs(deepcopy(trace))
        w.pbar_update(20)
        cluster = repetition_clustering(deepcopy(trace))
        w.pbar_update(25)
        if visualization == 0:

            v0 = ClusterViewer(cluster, create_bb_diff, trace.ctx_reg_size, save_func=save)
            w.pbar_update(24)
            v0.Show()

            prev_ctx = defaultdict(lambda: 0)
            stack_changes = defaultdict(lambda: 0)
            for line in cluster:
                if isinstance(line, Traceline):
                    prev_ctx = line.ctx
                else:
                    stack_changes = create_cluster_gist(line, trace.ctx_reg_size, prev_ctx, stack_changes)
                    prev_ctx = line[-1].ctx
            # sort the stack_changes by address
            sorted_result = sorted(stack_changes.keys())
            sorted_result.reverse()
            w.close()
            v1 = StackChangeViewer(vr, sorted_result, stack_changes)
            v1.Show()
        else:
            w.close()
            visualize_cli(cluster)
    except:
        w.close()
Example #10
0
def input_output_analysis(manual=False):
    """
    Input / Output analysis wrapper which computes the components of the output values of the VM function and allows for comparing these with the input arguments to the VM function.
    Afterwards the results are presented in the VMInputOutputViewer.
    :param manual: let user choose Function for input output analysis
    """
    func_addr = None
    if manual:
        func_addr = ChooseFunction('Please select the function for black box analysis')
    w = NotifyProgress('In/Out')
    w.show()

    trace = prepare_trace()
    vmr = get_vmr()
    # find relevant regs and operands
    ctx = {}
    try:
        if func_addr is not None:  # TODO enable input / output analysis of all functions
            input = find_input(deepcopy(trace))
            output = find_output(deepcopy(trace))
            w.close()
        else:
            vr = DynamicAnalyzer(find_virtual_regs, trace)
            w.pbar_update(10)
            vr.start()
            input = DynamicAnalyzer(find_input, trace)
            w.pbar_update(10)
            input.start()
            output = DynamicAnalyzer(find_output, trace)
            w.pbar_update(10)
            output.start()
            vr.join()
            w.pbar_update(20)
            vr = vr.get_result()
            # create the trace excerpt for every relevant reg
            for key in vr.keys():
                if get_reg_class(key) is not None:
                    ctx[key] = follow_virt_reg(deepcopy(trace), virt_reg_addr=vr[key], real_reg_name=key)
            vmr.vm_stack_reg_mapping = ctx
            w.pbar_update(20)
            input.join()
            w.pbar_update(10)
            output.join()
            w.pbar_update(10)

            w.close()
            v = VMInputOuputViewer(input.get_result(), output.get_result(), ctx)
            v.Show()
    except:
        w.close()
Example #11
0
def address_heuristic():
    """
    Compute the occurrence of every address in the instruction trace.
    """
    w = NotifyProgress('Address count')
    w.show()
    try:
        trace = prepare_trace()
        w.pbar_update(40)
        ac = address_count(deepcopy(trace))
        w.pbar_update(60)
        w.close()

        for addr, count in ac:
            print 'Address %x (Disasm: %s) was encountered %s times.' % (addr, GetDisasm(addr), count)
    except:
        print '[*] An exception occurred! Quitting! '
        w.close()
Example #12
0
def grading_automaton(visualization=0):
    """
    Grading System Analysis computes a grade for every trace line. It is basically a combination of all available analysis capabilities and runs them one after another, increases the grade
    for those trace lines which are in the analysis result and then runs the next trace analysis. In between the analysis runs a pattern matching run is started, to increase / decrease cer-
    tain trace lines grades based on known patterns. The patterns are modelled after known short comings of the analysis capabilities.
    :param trace: instruction trace
    :return: graded instruction trace
    """
    vmr = get_vmr()

    w = NotifyProgress('Grading')
    w.show()

    trace = prepare_trace()
    orig_trace = deepcopy(trace)
    try:
        ### INIT THE TRACE GRADES ###
        trace = init_grading(deepcopy(trace))
        w.pbar_update(10) # 10%

        ### REGISTER USAGE BASED: this must be done before optimization
        reg_dict = defaultdict(lambda: 0)

        # find the register infrastructure and vm addressing scheme -> this tells us which registers are used for addressing and are not important for grading_automaton
        try:
            for line in trace:
                assert isinstance(line, Traceline)
                if line.is_op2_reg and get_reg_class(line.disasm[2]) is not None:  # get reg class will only return != None for the 8-16 standard cpu regs
                    reg_dict[get_reg_class(line.disasm[2])] += 1

            # get the sorted list of regs highest occurence first
            sorted_keys = sorted(reg_dict.items(), key=operator.itemgetter(1), reverse=True)  # sorted_list = list of (reg_name, frequency)
            length = len(sorted_keys)
            w.pbar_update(10) # 20%
            # classify the important and less important registers
            if length % 2 == 0:
                important_regs = set(reg[0] for reg in sorted_keys[:(length / 2)])
                disregard_regs = set(reg[0] for reg in sorted_keys[(length / 2):])
            else:
                # if this is the case, one more register gets declared unimportant, since it is better to be more careful about raising grades
                important_regs = set(reg[0] for reg in sorted_keys[:(length - 1) / 2])
                disregard_regs = set(reg[0] for reg in sorted_keys[(length - 1) / 2:])
        except:
            pass


        ### OPTIMIZE TRACE ###
        try:
            if not trace.constant_propagation:
                trace = optimization_const_propagation(trace)
        except:
            pass
        w.pbar_update(10) #30%
        try:
            if not trace.stack_addr_propagation:
                trace = optimization_stack_addr_propagation(trace)
        except:
            pass

        ### REGISTER USAGE AND INPUT OUTPUT BASED ###
        # raise the grade of line containing input and output values
        try:
            values = find_input(deepcopy(trace)).union(find_output(deepcopy(trace)))
            for line in trace:
                for val in values:
                    if val in line.to_str_line():
                        line.raise_grade(vmr.in_out)

            w.pbar_update(10) #40%

            # backtrace regs and raise grade
            virt_regs = find_virtual_regs(deepcopy(trace))
            for key in virt_regs:
                if get_reg_class(key) in important_regs:
                    for line in follow_virt_reg(deepcopy(trace), virt_reg_addr=virt_regs[key]):
                        try:
                            for other in trace:
                                if line == other:
                                    other.raise_grade(vmr.in_out)
                        except ValueError:
                            print 'The line %s was not found in the trace, hence the grade could not be raised properly!' % line.to_str_line()
                elif get_reg_class(key) in disregard_regs:
                    for line in follow_virt_reg(deepcopy(trace), virt_reg_addr=virt_regs[key]):
                        try:
                            for other in trace:
                                if line == other:
                                    other.lower_grade(vmr.in_out)
                        except ValueError:
                            print 'The line %s was not found in the trace, hence the grade could not be lowered properly!' % line.to_str_line()
        except:
            pass
        w.pbar_update(5) #45%

        ### REGISTER USAGE FREQUENCY BASED ###
        try:
            # lower the grades for the most commonly used registers
            for line in trace:
                assert isinstance(line, Traceline)
                if line.is_op1_reg and get_reg_class(line.disasm[1]) is not None:  # get reg class will only return != None for the 8-16 standard cpu regs
                    reg_dict[get_reg_class(line.disasm[1])] += 1

            # get the sorted list of regs highest occurrence first
            sorted_keys = sorted(reg_dict.items(), key=operator.itemgetter(1), reverse=True)  # sorted_list = list of (reg_name, frequency)
            length = len(sorted_keys)
            w.pbar_update(5) #50%
            # classify the less important registers
            if length % 2 == 0:
                disregard_regs = set(reg[0] for reg in sorted_keys[:(length / 2)])
            else:
                disregard_regs = set(reg[0] for reg in sorted_keys[:(length - 1) / 2])


            for line in trace:
                assert isinstance(line, Traceline)
                if line.is_jmp or line.is_mov or line.is_pop or line.is_push or line.disasm[0].startswith('ret') or line.disasm[
                    0].startswith('inc') or line.disasm[0].startswith('lea'):
                    line.lower_grade(vmr.pa_ma)
                elif len(line.disasm) > 1 and get_reg_class(line.disasm[1]) in disregard_regs:
                    line.lower_grade(vmr.pa_ma)
        except:
            pass
        w.pbar_update(10) #60%

        ### CLUSTERING BASED ###
        try:
            # raise the grades of the unique lines after clustering
            cluster_result = repetition_clustering(deepcopy(trace))
            for line in cluster_result:
                if isinstance(line, Traceline):
                    trace[trace.index(line)].raise_grade(vmr.clu)
        except:
            pass
        w.pbar_update(10) #70%

        ### PEEPHOLE GRADING ###
        try:
            # peephole grading
            for line in trace:
                assert isinstance(line, Traceline)
                if line.disasm[0] in ['pop', 'push', 'inc', 'dec', 'lea', 'test'] or line.disasm[0].startswith('c') or line.is_jmp or line.is_mov or line.disasm[0].startswith('r'):
                    line.lower_grade(vmr.pa_ma)
                elif len(line.disasm) > 1 and get_reg_class(line.disasm[1]) > 4:
                    continue
                else:
                    line.raise_grade(vmr.pa_ma)
        except:
            pass
        w.pbar_update(10) #80%

        ### OPTIMIZATION BASED ###
        try:
            opti_trace = optimize(deepcopy(trace))
            w.pbar_update(10) #90%
            for line in opti_trace:
                assert isinstance(line, Traceline)
                try:  # trace is heavily changed after optimization, might not find the trace line in the pre_op_trace
                    trace[trace.index(line)].raise_grade(vmr.pa_ma)
                except:
                    pass
                # additionally raise grade for every line that uses the memory and is not a mov
                if line.disasm_len == 3 and line.is_op1_mem and not line.is_mov:
                    try:
                        trace[trace.index(line)].raise_grade(vmr.mem_use)
                    except:
                        pass
                else:
                    trace[trace.index(line)].lower_grade(vmr.pa_ma)
        except:
            pass
        w.pbar_update(5)

        ### STATIC OPTIMIZATION BASED ###
        # TODO atm this is a little workaround to include the static analysis results
        try:
            comments = set(v_inst.split(' ')[0] for v_inst in [Comment(ea) for ea in range(vmr.code_start, vmr.code_end)] if v_inst is not None)
            print comments
            ins = [c.lstrip('v').split('_')[0] for c in comments]
            for line in trace:
                if line.disasm[0] in ins:
                    line.raise_grade(vmr.static)

        except:
            pass
        w.pbar_update(5)

        ### RECURSION ###
        try:
            recursion = 0
            vm_func = find_vm_addr(orig_trace)
            for line in orig_trace:
                if line.disasm[0].startswith('call') and line.disasm[1].__contains__(vm_func):
                    recursion = recursion + 1
        except:
            pass
        w.close()

        grades = set([line.grade for line in trace])
        max_grade = max(grades)
        # raise the trace lines grade containing calls to maximum grade
        try:
            # such nach call und vm_addr
            for line in trace:
                if line.disasm[0].startswith('call') and line.disasm[1].__contains__(vm_func):
                    line.grade = max_grade
                elif line.disasm[1].__contains__('ss:') or line.disasm[2].__contains('ss:'):
                    line.grade = max_grade
        except:
            pass


        if visualization == 0:
            v = GradingViewer(trace, save=save)
            v.Show()
        else:
            threshold = AskLong(1, 'There are a total of %s grades: %s. Specify a threshold which lines to display:' % (len(grades), ''.join('%s ' % c for c in grades)))
            if threshold > -1:
                for line in trace:
                    if line.grade >= threshold:
                        print line.grade, line.to_str_line()

    except Exception, e:
        w.close()
        msg(e.message + '\n')
Example #13
0
def clustering_analysis(visualization=0, grade=False, trace=None):
    """
    Clustering analysis wrapper which clusters the trace into repeating instructions and presents the results in the Clustering Viewer.
    :param visualization: output via Clustering Viewer or output window
    :param grade: grading
    :param trace: instruction trace
    """
    if trace is None:
        trace = prepare_trace()

    w = NotifyProgress('Clustering')
    w.show()

    try:
        try:
            if not trace.constant_propagation:
                trace = optimization_const_propagation(trace)
            if not trace.stack_addr_propagation:
                trace = optimization_stack_addr_propagation(trace)
        except:
            pass
        w.pbar_update(30)
        # cluster
        vr = find_virtual_regs(deepcopy(trace))
        w.pbar_update(20)
        cluster = repetition_clustering(deepcopy(trace))
        w.pbar_update(25)
        if visualization == 0:

            v0 = ClusterViewer(cluster, create_bb_diff, trace.ctx_reg_size, save_func=save)
            w.pbar_update(24)
            v0.Show()

            prev_ctx = defaultdict(lambda: 0)
            stack_changes = defaultdict(lambda: 0)
            for line in cluster:
                if isinstance(line, Traceline):
                    prev_ctx = line.ctx
                else:
                    stack_changes = create_cluster_gist(line, trace.ctx_reg_size, prev_ctx, stack_changes)
                    prev_ctx = line[-1].ctx
            # sort the stack_changes by address
            sorted_result = sorted(stack_changes.keys())
            sorted_result.reverse()
            w.close()
            v1 = StackChangeViewer(vr, sorted_result, stack_changes)
            v1.Show()
        else:
            w.close()
            visualize_cli(cluster)
    except:
        w.close()
Example #14
0
def input_output_analysis(manual=False):
    """
    Input / Output analysis wrapper which computes the components of the output values of the VM function and allows for comparing these with the input arguments to the VM function.
    Afterwards the results are presented in the VMInputOutputViewer.
    :param manual: let user choose Function for input output analysis
    """
    func_addr = None
    if manual:
        func_addr = ChooseFunction('Please select the function for black box analysis')
    w = NotifyProgress('In/Out')
    w.show()

    trace = prepare_trace()
    vmr = get_vmr()
    # find relevant regs and operands
    ctx = {}
    try:
        if func_addr is not None:  # TODO enable input / output analysis of all functions
            input = find_input(deepcopy(trace))
            output = find_output(deepcopy(trace))
            w.close()
        else:
            vr = DynamicAnalyzer(find_virtual_regs, trace)
            w.pbar_update(10)
            vr.start()
            input = DynamicAnalyzer(find_input, trace)
            w.pbar_update(10)
            input.start()
            output = DynamicAnalyzer(find_output, trace)
            w.pbar_update(10)
            output.start()
            vr.join()
            w.pbar_update(20)
            vr = vr.get_result()
            # create the trace excerpt for every relevant reg
            for key in vr.keys():
                if get_reg_class(key) is not None:
                    ctx[key] = follow_virt_reg(deepcopy(trace), virt_reg_addr=vr[key], real_reg_name=key)
            vmr.vm_stack_reg_mapping = ctx
            w.pbar_update(20)
            input.join()
            w.pbar_update(10)
            output.join()
            w.pbar_update(10)

            w.close()
            v = VMInputOuputViewer(input.get_result(), output.get_result(), ctx)
            v.Show()
    except:
        w.close()
Example #15
0
def address_heuristic():
    """
    Compute the occurrence of every address in the instruction trace.
    """
    w = NotifyProgress('Address count')
    w.show()
    try:
        trace = prepare_trace()
        w.pbar_update(40)
        ac = address_count(deepcopy(trace))
        w.pbar_update(60)
        w.close()

        for addr, count in ac:
            print 'Address %x (Disasm: %s) was encountered %s times.' % (addr, GetDisasm(addr), count)
    except:
        print '[*] An exception occurred! Quitting! '
        w.close()
Example #16
0
    def PopulateModel(self):
        self.Clean()
        vmr = get_vmr()
        w = NotifyProgress()
        w.show()
        ctr = 0
        max = len(self.trace)

        # present clustering analysis in viewer
        prev_ctx = defaultdict(lambda: 0)
        for line in self.trace:

            ctr += 1
            w.pbar_set(int(float(ctr) / float(max) * 100))

            if isinstance(line, Traceline):
                tid = QtGui.QStandardItem('%s' % line.thread_id)
                addr = QtGui.QStandardItem('%x' % line.addr)
                disasm = QtGui.QStandardItem(line.disasm_str())
                comment = QtGui.QStandardItem(''.join(c for c in line.comment if line.comment is not None))
                context = QtGui.QStandardItem(''.join('%s:%s ' % (c, line.ctx[c]) for c in line.ctx if line.ctx is not None))
                prev_ctx = line.ctx
                self.sim.appendRow([tid, addr, disasm, comment, context])
            else:
                cluster_node = QtGui.QStandardItem('Cluster %x-%x' % (line[0].addr, line[-1].addr))
                self.sim.appendRow(cluster_node)
                if vmr.bb:
                    cluster = line
                    bbs = []
                    bb = []
                    # subdivide the clusters by basic blocks
                    for line in cluster:
                        assert isinstance(line, Traceline)
                        if line.disasm[0].startswith('j'):
                            bb.append(line)
                            bbs.append(bb)
                            bb = []
                        else:
                            bb.append(line)

                    for bb in bbs:

                        bb_sum = self.bb_func(bb, self.ctx_reg_size, prev_ctx)
                        bb_node = QtGui.QStandardItem(
                            'BB%s Summary %x-%x: %s\t%s\t%s' % (bbs.index(bb), bb[0].addr, bb[-1].addr,
                                                                ''.join('%s ; ' % (''.join('%s, ' % c for c in line)) for line in bb_sum.disasm),
                                                                ''.join('%s, ' % c for c in filter(None, bb_sum.comment) if bb_sum.comment is not None),
                                                                ''.join('%s:%s ' % (c, bb_sum.ctx[c]) for c in bb_sum.ctx if bb_sum.ctx is not None)))
                        for line in bb:
                            tid = QtGui.QStandardItem('%s' % line.thread_id)
                            addr = QtGui.QStandardItem('%x' % line.addr)
                            disasm = QtGui.QStandardItem(line.disasm_str())
                            comment = QtGui.QStandardItem(''.join(c for c in line.comment if line.comment is not None))
                            context = QtGui.QStandardItem(
                                ''.join('%s:%s ' % (c, line.ctx[c]) for c in line.ctx if line.ctx is not None))
                            bb_node.appendRow([tid, addr, disasm, comment, context])
                        cluster_node.appendRow(bb_node)
                        self.treeView.setFirstColumnSpanned(bbs.index(bb), cluster_node.index(), True)

                        prev_ctx = bb[-1].ctx
                else:
                    for l in line:
                        tid = QtGui.QStandardItem('%s' % l.thread_id)
                        addr = QtGui.QStandardItem('%x' % l.addr)
                        disasm = QtGui.QStandardItem(l.disasm_str())
                        comment = QtGui.QStandardItem(''.join(c for c in l.comment if l.comment is not None))
                        context = QtGui.QStandardItem(''.join('%s:%s ' % (c, l.ctx[c]) for c in l.ctx if l.ctx is not None))
                        cluster_node.appendRow([tid, addr, disasm, comment, context])

        w.close()

        self.treeView.resizeColumnToContents(0)
        self.treeView.resizeColumnToContents(1)
        self.treeView.resizeColumnToContents(2)
        self.treeView.resizeColumnToContents(3)
        self.treeView.resizeColumnToContents(4)