def test_build(self):
        instructions = TextDisassembleReader(
            TestTextDisassembleReader.ASM_PATH).read_instructions()

        collector = MetadataCollector()
        collector.collect(instructions)

        corruptor = RandomCorruptor(10.0, 2, True)
        corruptor.save_corrupted_program = False
        instructions = corruptor.corrupt(
            from_instruction_list_to_dict(instructions))

        problem = ProblemBuilder().build(instructions, collector)

        c = 1
        for k, v in instructions.items():
            c *= len(instructions[k])

        print('Initial solutions: {}'.format(c))

        solutions = problem.getSolutions()

        print('After constraints solutions: {}'.format(len(solutions)))

        #for x in range(1, min(4, len(solutions))):
        #    print('---------------')
        #    for v in solutions[x]:
        #        print('{} : {}'.format(v, str(solutions[x][v])))

        self.assertGreaterEqual(c, len(solutions))
Beispiel #2
0
    def test_collect(self):
        instructions = TextDisassembleReader(self.ASM_PATH).read_instructions()
        # Collect the metadata
        c = MetadataCollector()
        c.collect(instructions)

        # Check the counting
        self.assertEqual(3, len(c.condition_count))
        """
        prev_inst = None
        for inst in c.empty_spaces:
            if prev_inst is None:
                print('{}; 0; {}'.format(inst.encoding, inst))
            else:
                print('{}; {}; {}'.format(inst.encoding, abs(prev_inst.encoding - inst.encoding), inst))
            prev_inst = inst
        """
        # Asser the max, mean, min distance between registers
        for i in range(0, AReg.STORAGE_COUNT):
            if i in c.storage_mean_dist:
                print("{}: {}, {}, {} ".format(i, c.storage_min_dist[i],
                                               c.storage_mean_dist[i],
                                               c.storage_max_dist[i]))
                self.assertTrue(
                    c.storage_min_dist[i] <= c.storage_mean_dist[i] <=
                    c.storage_max_dist[i],
                    "{}: {}, {}, {} ".format(i, c.storage_min_dist[i],
                                             c.storage_mean_dist[i],
                                             c.storage_max_dist[i]))
 def get_original_instruction(path, address, needs_collector):
     if needs_collector:
         program = TextDisassembleReader(path).read_instructions()
         collector = MetadataCollector()
         collector.collect(program)
         program = from_instruction_list_to_dict(program)
         return program[address][0], program, collector
     else:
         program = from_instruction_list_to_dict(TextDisassembleReader(path).read_instructions())
         return program[address][0], program, None
 def obtain_corrupted_program():
     # Obtain a program and corrupt it
     instructions = ElfioTextDisassembleReader("data/helloworld_elfiodissasembly.disam").read()[1]
     collector = MetadataCollector()
     collector.collect(instructions)
     corruptor = RandomCorruptor(30.0, 5, True)
     corruptor.save_corrupted_program = False
     program = [CAPSInstruction(x.encoding, x.address) for x in instructions]
     program = corruptor.corrupt(from_instruction_list_to_dict(program))
     return instructions, program
 def __init__(self, program, original_program):
     self._from_max_to_min = False
     self._program = program
     self._metadata = MetadataCollector()
     self._metadata.collect(original_program)
     self._solution_size = 0
     self._original = from_instruction_list_to_dict(original_program)
     self._solution = BitQueue()
     self._forward_update = False
     # Indicates min number of candidates an address is reduced to
     self._candidates_reduced = None
Beispiel #6
0
def collect_and_print(fun_name, instructions, program):
    c = MetadataCollector()
    print("Function: " + fun_name)
    c.collect(instructions)
    print(c.condition_count)
    print(c.instruction_count)
    print(c.storage_count)
    prev_inst = None
    for inst in c.empty_spaces:
        if prev_inst is None:
            print('{}; 0; {}'.format(inst.encoding, inst))
        else:
            print('{}; {}; {}'.format(inst.encoding,
                                      abs(prev_inst.encoding - inst.encoding),
                                      inst))
        prev_inst = inst

    x = [
        0 if i not in c.storage_count else c.storage_count[i]
        for i in range(0, 18)
    ]
    ind = np.arange(18)
    plt.clf()
    plt.bar(ind, x, 0.35)
    plt.tight_layout()
    plt.savefig(program + '_registers.svg')

    x = [
        0 if i not in c.condition_count else c.condition_count[i]
        for i in range(0, 15)
    ]
    ind = np.arange(15)
    plt.clf()
    plt.bar(ind, x, 0.35)
    plt.tight_layout()
    plt.savefig(program + '_condition.svg')

    x = [
        0 if i not in c.instruction_count else c.instruction_count[i]
        for i in range(0, 200)
    ]
    ind = np.arange(200)
    plt.clf()
    plt.bar(ind, x, 0.35)
    plt.tight_layout()
    plt.savefig(program + '_instruction.svg')
    plt.show()
class AbstractForwardConstraintSolutionBuilder(object):
    def __init__(self, program, original_program):
        self._from_max_to_min = False
        self._program = program
        self._metadata = MetadataCollector()
        self._metadata.collect(original_program)
        self._solution_size = 0
        self._original = from_instruction_list_to_dict(original_program)
        self._solution = BitQueue()
        self._forward_update = False
        # Indicates min number of candidates an address is reduced to
        self._candidates_reduced = None

    @property
    def solution(self):
        return self._solution

    @property
    def solution_size(self):
        return self._solution_size

    def _comply_constraints(self, inst):
        """
        Indicates if selecting an instruction as solution
        :param inst:
        :return:
        """
        if inst.ignore:
            return False

        m = self._metadata
        if not inst.opcode_field in m.instruction_count or m.instruction_count[
                inst.opcode_field] == 0:
            return False

        if not inst.conditional_field in m.condition_count or m.condition_count[
                inst.conditional_field] == 0:
            return False

        for r in inst.registers_used():
            if not r in m.storage_count or m.storage_count[r] == 0:
                return False

        return True

    def _remove_invalid_instructions(self, addresses):
        """
        Remove all instructions not complaining with current constraints
        :return: True if some instructions were removed
        """
        for k in range(1, len(addresses)):
            a = addresses[k]
            i = 1
            pa = self._program[a]
            while i < len(pa):
                if not self._comply_constraints(pa[i - 1]):
                    pa.pop(i - 1)
                else:
                    i += 1

    def _update_constraints(self, inst):
        if inst.ignore:
            return

        if inst.opcode_field in self._metadata.instruction_count:
            self._metadata.instruction_count[inst.opcode_field] -= 1

        if inst.conditional_field in self._metadata.condition_count:
            self._metadata.condition_count[inst.conditional_field] -= 1

        for r in inst.storages_used():
            if r in self._metadata.storage_count:
                self._metadata.storage_count[r] -= 1

    def _find_address_correct_index(self, pa, ori, ln):
        index = 0
        while index < ln and str(pa[index]) != str(ori):
            index += 1
        if index == ln:
            raise RuntimeError('Impossible')
        return index

    def build(self):
        addresses = [x for x in self._program.keys()]
        addresses.sort(key=lambda x: len(self._program[x]),
                       reverse=self._from_max_to_min)

        while len(addresses) > 0:
            pa = self._program[addresses[0]]
            ln = len(pa)
            ori = self._original[addresses[0]][0]
            if ln <= 0 or ori.ignore:
                addresses.pop(0)
                continue

            index = 0
            if ln > 1:
                index = self._find_address_correct_index(pa, ori, ln)
                self._on_index_found(index, pa, ori, ln)

            if self._forward_update:
                # Update the constrains now that we have updated the solution
                self._update_constraints(pa[index])
                # With the constrains updated, remove all instructions which are invalid
                self._remove_invalid_instructions(addresses)

            addresses.pop(0)
            # if self._candidates_reduced:
            addresses.sort(key=lambda x: len(self._program[x]),
                           reverse=self._from_max_to_min)

    def _on_index_found(self, size, pa, ori, ln):
        pass
def run_recovery(original_program, corruptor, recuperator, passes=1):
    # Separe the instructions from the function addresses
    original_program, fns = from_functions_to_list_and_addr(original_program)
    # Clone the original program
    program = [
        CAPSInstruction(v.encoding, position=v.address)
        for v in original_program
    ]

    # Collect the metrics on it
    collector = MetadataCollector()
    collector.collect(program)

    print("[INFO]: Metrics collected")

    # Corrupt it:
    print("[INFO]: Corrupting program")
    program = corruptor.corrupt(from_instruction_list_to_dict(program))
    print("[INFO]: Program corrupted")
    SolutionQuality(program, original_program).report()

    print_report('corrupted_program.txt', original_program,
                 from_instruction_dict_to_list(program))

    initialwriter = SolutionWriter()
    initialwriter.write_binary('initial_solution.sol', original_program,
                               program)

    pass_count = 1
    while (True):
        stable = True
        r = recuperator(collector, program, functions=fns)
        r.passes = passes
        r.recover()
        print("[INFO]: Heuristics computed  (pass {})".format(pass_count))

        print_report('instructions{}.txt'.format(pass_count), original_program,
                     from_instruction_dict_to_list(program))

        # Determine if there is any instruction that can be removed:
        for k, v in program.items():
            # Remove 0 or less than 1 if any instruction has 1 score
            prev = len(v)
            if remove_bad_candidates_at_addr(v) > 0:
                stable = False
            #if len(v) == 0:
            #    raise RuntimeError('Should not be empty')
        SolutionQuality(program, original_program).report()
        if stable:
            break
        pass_count += 1

    pass_count += 1

    # Change to continous
    for v in program.values():
        for inst in v:
            inst.score_function = probabilistic_rules
    print_report('instructions{}.txt'.format(pass_count), original_program,
                 from_instruction_dict_to_list(program))

    print('[INFO]: Constraining: ')
    b = ForwardConstraintSolutionEnumerator(program, original_program)
    b.build()
    print('[INFO]: Constrained solution size: {}'.format(b.solution_size))
    print('[INFO]: Constrained solution: {}'.format(b.solution))
    a = SolutionQuality(program, original_program)
    a.report()

    pass_count += 1
    print_report('instructions{}.txt'.format(pass_count), original_program,
                 from_instruction_dict_to_list(program))

    writer = SolutionWriter()
    writer.write_binary('final_solution.sol', original_program, program)