Exemple #1
0
 def add_file(self, filepath, offset):
     '''
     For the ELF file at filepath, adds a mapping to self and loads debug 
     symbols into GDB. offset is added to the start and end addresses for
     the mapping (offset is the offset that the ELF is loaded to)
     '''
     bn = os.path.basename(filepath)
     if bn in self._files:
         return
     elf_sects = read_elf_sects(filepath)
     sects = {}
     for sect, (start, size) in elf_sects.items():
         start += offset
         sects[sect] = start
         ad = AttrDict(start=start,
                       end=start + size,
                       size=size,
                       offset=0,
                       name=bn,
                       sect=sect)
         self.append(ad)
     cmd = "add-symbol-file '{}' {:#x} {}".format(
         filepath, sects[".text"], " ".join("-s {} {:#x}".format(nm, st)
                                            for (nm, st) in sects.items()
                                            if nm != ".text"))
     gdb.execute(cmd, False, True)
     self._files.add(bn)
Exemple #2
0
    def __init__(self, backtrace_text, blacklist=None, major_depth=5):
        list.__init__(self)
        self.blacklist = blacklist

        hc = 0
        i = 0
        major = "0"
        minor = "0"
        self.abnormal_termination = False

        for line in backtrace_text.splitlines():
            if "#" == line[0]:
                self.append(Frame(line))

        for frame in self:

            # The check below is a workaround for a known libc/gdb runaway
            # backtrace issue, see
            # http://sourceware.org/ml/libc-alpha/2012-03/msg00573.html
            if frame.name() and "libc_start_main" in frame.name():
                break

            if not self._in_blacklist(frame):
                if hc < major_depth:
                    major = hashlib.md5(
                        (major + str(hash(frame))).encode()).hexdigest()
                minor = hashlib.md5(
                    (minor + str(hash(frame))).encode()).hexdigest()
                hc += 1
            else:
                frame.blacklisted = True

            i += 1

        self.hash = AttrDict(major=major, minor=minor)
Exemple #3
0
    def __init__(self, target, blocklist=None, major_depth=5, limit=0):
        '''
        Uses the GDB API to populate self. Any frames in
        blocklist are marked as such. The first non-blocklisted,
        major_depth frames are used to calculate the
        backtrace's major hash.
        '''
        list.__init__(self)
        self.blocklist = blocklist

        frame = self._next_frame()
        hc = 0
        i = 0
        major = "0"
        minor = "0"
        self.abnormal_termination = False
        while frame:
            frame = self._next_frame(target, frame, i)
            if not frame:
                break

            # The check below is a workaround for a known libc/gdb runaway
            # backtrace issue, see
            # http://sourceware.org/ml/libc-alpha/2012-03/msg00573.html
            if frame.name() and "libc_start_main" in frame.name():
                break

            if not self._in_blocklist(frame):
                if hc < major_depth:
                    major = hashlib.md5(
                        (major + str(frame)).encode()).hexdigest()
                minor = hashlib.md5((minor + str(frame)).encode()).hexdigest()
                hc += 1
            else:
                frame.blocklisted = True
            self.append(frame)

            # some versions of the GDB Python API do not expose a frame unwind
            # error sentinel; if it is not available a hardcoded value based on
            # an enum from GDB's gdb/frames.h is used
            unwind_error = getattr(gdb, "FRAME_UNWIND_FIRST_ERROR", 3)
            if frame.unwind_stop_reason() >= unwind_error:
                self.abnormal_termination = True
                break

            try:
                frame = frame.older()
            except RuntimeError:
                self.abnormal_termination = True
                break
            i += 1

            if limit and i >= limit:
                break

        self.hash = AttrDict(major=major, minor=minor)
Exemple #4
0
    def _add_by_target(self, num_initial_stacks=0):
        '''
        Searches for thread stacks and adds corresponding ranges to self.
        Also sets self.tgt_img to the name associated with the first thread 
        stack.

        The first num_initial_stacks sections in the target (such as a core 
        file) are treated as thread stacks. This logic is currently only
        used by the ASan logic, which is used by a specific user and
        largely untested.
        '''

        # Collect thread stack info
        mapstr = str(gdb.execute("info target", False, True))
        ranges = []
        fn = None
        for m in self._re_info_target.finditer(mapstr):
            if m.group("file") is not None:
                fn = os.path.basename(m.group("file"))
            else:
                lib = m.group("lib")
                if lib:
                    lib = os.path.basename(lib)
                ranges.append((int(m.group("start"),
                                   16), int(m.group("end"),
                                            16), fn, lib, m.group("section")))

        # Append thread stack info to self
        for i, (st, en, fn, lib, sect) in enumerate(ranges):
            if i < num_initial_stacks:
                # We could correlate thread stack pointer to one of the ranges,
                # but we'll just assume for now that for num_initial_stacks,
                # the first num_intial_stacks sections in the core are the stacks
                name = "[stack]"
            elif lib is not None:
                name = lib
            else:
                name = fn
            ad = AttrDict(start=st,
                          end=en,
                          size=(en - st),
                          offset=0,
                          name=name,
                          sect=sect)
            self.append(ad)
            #gdb.write("{}: {:#x}-{:#x} {}".format(file, ad.start, ad.end, sect))
            self._files.add(os.path.basename(fn))
        if ranges:
            self.tgt_img = ranges[0][2]
Exemple #5
0
 def __init__(self):
     '''
     Queries the GDB Python API for the process address space, parses
     it, and appends it to self
     '''
     self._common_init()
     mapstr = str(gdb.execute("info proc map", False, True))
     header_pos = mapstr.find("Start Addr")
     if header_pos == -1:
         raise GdbWrapperError("Unable to parse \"info proc map\" string")
     maplines = mapstr[header_pos:].splitlines()[1:]
     for line in maplines:
         line = line.split()
         start, end, size, offset = tuple(int(i, 16) for i in line[0:4])
         name = " ".join(line[4:])
         ad = AttrDict(start=start, end=end, size=size,
               offset=offset, name=name)
         self.append(ad)
Exemple #6
0
 def __init__(self, mapstr):
     '''
     Queries the GDB Python API for the process address space, parses
     it, and appends it to self
     '''
     for line in mapstr.splitlines():
         cols = line.strip().split()
         start, end = [int(i, 16) for i in cols[0].split("-")]
         size = end - start
         perms = cols[1]
         offset = int(cols[2], 16)
         dev = cols[3]
         inode = cols[4]
         name = " ".join(cols[5:]).strip()
         ad = AttrDict(start=start,
                       end=end,
                       size=size,
                       offset=offset,
                       name=name,
                       perms=perms,
                       dev=dev,
                       inode=inode)
         self.append(ad)
Exemple #7
0
    def getRules(self, target):
        '''
        Organizes the nested list of rules (dicts) for classification

        The rules specified in rules.py are organized into AttrDicts ("rules").
        Each rule is composed of a tag and a match_function.
        '''

        processed_rules = []
        num_rules = sum(len(rl) for (_, rl) in rules.rules)
        ranking = 1
        for cat, user_rule_list in rules.rules:
            for user_rule in user_rule_list:
                match_function = partial(
                    getattr(target.analyzer, user_rule["match_function"]))
                tag_data = copy.deepcopy(user_rule)
                del tag_data["match_function"]
                tag_data["ranking"] = (ranking, num_rules)
                tag_data["category"] = cat
                rule = AttrDict(matches=match_function, tag=Tag(tag_data))
                processed_rules.append(rule)
                ranking += 1

        return processed_rules
Exemple #8
0
 def __init__(self, target):
     AttrDict.__init__(self)
     self.tags = []
Exemple #9
0
class Target(object):
    '''
    A wrapper for a Linux GDB Inferior. Includes of various convenience
    methods used for classification.

    WARNING: Methods in this object may change the state of GDB. For
    example, the disassembly flavor may be left as "intel"
    after this code is executed.
    '''
    _re_info_frame = re.compile(
        r"""^\s*eip\s=\s([^\s;]*)(?:\sin\s)? # addr
                                                ([^\s;]*) # fname
                                                ([^\s;]*) # source_file:line
                                 """, re.VERBOSE)
    _re_gdb_info_sym = re.compile(
        r"""^\s*(?P<sym>.*?)\s+\+\s+(?P<off>[0-9]+)\s+
                                       in\s+section\s+\.text(\s+
                                       of\s+(?P<lib>.*?)\s*)?$""", re.VERBOSE)
    _re_gdb_addr_bit = re.compile(r"^gdbarch_dump: addr_bit = ([0-9]+)$",
                                  re.MULTILINE)
    _re_gdb_osabi = re.compile(r"\(currently \"(.*)\"\)")
    _re_gdb_arch = re.compile(r"\(currently\s+(.+)\)")

    # these functions and libs are not considered to be at fault for a crash
    blocklist = AttrDict(
        functions=("__kernel_vsyscall", "abort", "raise", "malloc", "free",
                   "*__GI_abort", "*__GI_raise", "malloc_printerr",
                   "__libc_message", "_int_malloc", "_int_free"),
        map_regex=re.compile(r".*/libc(\.|-).*|.*/libm(\.|-).*"))

    def __init__(self, bt_limit=0):
        self._check_inferior_state()
        self.bt_limit = bt_limit

    def _check_inferior_state(self):
        if len(gdb.inferiors()) != 1:
            raise GdbWrapperError(
                "Unsupported number of inferiors ({})".format(
                    len(gdb.inferiors())))
        if len(gdb.inferiors()[0].threads()) == 0:
            raise GdbWrapperError("No threads running")
        if not gdb.inferiors()[0].threads()[0].is_stopped:
            raise GdbWrapperError("Inferior's primary thread is not stopped")

    @memoized
    def backtrace(self):
        return Backtrace(self, self.blocklist, limit=self.bt_limit)

    def hash(self):
        return self.backtrace().hash

    @memoized
    def procmaps(self):
        return ProcMaps()

    @memoized
    def faulting_frame(self):
        for frame in self.backtrace():
            if not frame.blocklisted:
                return frame
        warnings.warn("All frames blocklisted")
        return None

    @staticmethod
    def sym_addr(sym):
        try:
            return gdb_uint(gdb.parse_and_eval(str(sym)))
        except gdb.error:
            return None

    @memoized
    def current_instruction(self):
        try:
            gdbstr = gdb.execute("x/i 0x%x" % self.pc(), False,
                                 True).splitlines()[0]
            return self._getInstruction(gdbstr)
        except RuntimeError:
            return None

    def _getInstruction(self, gdbstr):
        return x86Instruction(gdbstr)

    @memoized
    def pc(self):
        return gdb_uint(gdb.parse_and_eval("$pc"))

    @memoized
    def stack_pointer(self):
        return gdb_uint(gdb.parse_and_eval("$sp"))

    @memoized
    def pid(self):
        return gdb.inferiors()[0].pid

    @memoized
    def pointer_size(self):
        return int(
            self._re_gdb_addr_bit.search(
                gdb.execute("maint print architecture", False,
                            True)).group(1)) / 8

    @memoized
    def si_signo(self):
        # This is a workaround to a bug in the GDB Python API:
        # The only reliable way to cause GDB to raise an exception when
        # $_siginfo is not available it to call __str__() -- otherwise
        # (such as when casting the Gdb.Value to another type), GDB may
        # force Python to abruptly exit rather than raising an exception
        signo = gdb.parse_and_eval("$_siginfo.si_signo")
        str(signo)
        return signo

    @memoized
    def si_addr(self):
        str(gdb.parse_and_eval("$_siginfo._sifields._sigfault.si_addr"))
        return gdb_uint(
            gdb.parse_and_eval("$_siginfo._sifields._sigfault.si_addr"))
Exemple #10
0
 def __init__(self, target):
     AttrDict.__init__(self)
     self.tags = []
Exemple #11
0
    def __init__(self, asan_output, bt_limit=0):
        self.__memo__ = {
            "isPossibleStackCorruption()": False,
            "isStackCorruption()": False,
            "isStackOverflow()": False,
            "si_signo()": 11
        }
        if not asan_output:
            raise GdbWrapperError("no ASan data to analyze")

        # symbolize asan_message
        self.asan_stack = []
        out = []
        last = 0
        all_frames = []
        maps = self.procmaps()
        i = 0
        for m in self._re_asan_bt.finditer(asan_output):
            frame, addr, img, offset = m.group("frame", "addr", "img",
                                               "offset")
            frame = int(frame)
            addr = int(addr, 16)  #+ 1
            if img:
                maps.add_file(img, addr - offset)
            out.append(asan_output[last:m.end("all")])
            all_frames.append((frame, addr, offset, img, len(out)))
            out.append(None)
            last = m.end()

            i += 1
            if i >= bt_limit:
                break

        if not all_frames:
            raise GdbWrapperError("No frames found in address sanitizer log")

        out.append(asan_output[last:])
        frame = -1
        for num, addr, offset, img, outpos in all_frames:
            region = maps.findByAddr(addr)
            symbol = gdb.execute("info symbol {:#x}".format(addr), False, True)
            symline = gdb.execute("info line *{:#x}".format(addr), False, True)
            if symline and symline.startswith("Line"):
                symline = "\n\t{}".format(
                    self._re_symline_trim.sub("", symline))
            else:
                symline = ""
            symbol_m = self._re_gdb_info_sym.search(symbol)
            if img:
                lib = img
            elif region:
                lib = region.name
            else:
                lib = None
            if symbol_m is None:
                sym = None
                off = offset
            else:
                sym = symbol_m.group("sym")
                off = int(symbol_m.group("off"))
            if frame == -1:
                self.asan_pc_img = lib, offset
            if frame is not None and num > frame:
                frame = num
                if lib:
                    lib = os.path.basename(lib)
                self.asan_stack.append(
                    AttrDict(addr=addr, lib=lib, off=off, name=sym))
            else:
                frame = None
            out[outpos] = "{}){}".format(
                ASanFrame.create(self, addr, sym, off).terse(), symline)
        asan_output = "".join(out)
        gdb.write(asan_output)
        gdb.flush()
        # parse ASAN's analysis
        m = self._re_asan_fault.search(asan_output)
        self.__memo__["si_addr()"] = int(m.group("fault"), 16)
        self.asan_reason = m.group("desc")
        if self.asan_reason == "double-free":
            self.__memo__["pc()"] = self.asan_stack[1].addr
            self.__memo__["stack_pointer()"] = None  # what to do? ....
        else:
            self.__memo__["pc()"] = int(m.group("pc"), 16)
            if m.group("bspid1") == "sp":
                self.__memo__["stack_pointer()"] = int(m.group("bsp1"), 16)
            else:
                self.__memo__["stack_pointer()"] = int(m.group("bsp2"), 16)
            if self.asan_reason != "SEGV":
                self.asan_operation = m.group("operation")
Exemple #12
0
class Target(object):
    '''
    A wrapper for a Linux GDB Inferior. Includes of various convenience
    methods used for classification.

    WARNING: Methods in this object may change the state of GDB. For
    example, the disassembly flavor may be left as "intel"
    after this code is executed.
    '''
    _re_info_frame = re.compile(
        r"""^\s*eip\s=\s([^\s;]*)(?:\sin\s)? # addr
                                                ([^\s;]*) # fname
                                                ([^\s;]*) # source_file:line
                                 """, re.VERBOSE)
    _re_gdb_info_sym = re.compile(
        r"""^\s*(?P<sym>.*?)\s+\+\s+(?P<off>[0-9]+)\s+
                                       in\s+section\s+\.text(\s+
                                       of\s+(?P<lib>.*?)\s*)?$""", re.VERBOSE)
    _re_gdb_addr_bit = re.compile(r"^gdbarch_dump: addr_bit = ([0-9]+)$",
                                  re.MULTILINE)
    _re_gdb_osabi = re.compile(r"\(currently \"(.*)\"\)")
    _re_gdb_arch = re.compile(r"\(currently\s+(.+)\)")

    # these functions and libs are not considered to be at fault for a crash
    blacklist = AttrDict(
        functions=("__kernel_vsyscall", "abort", "raise", "malloc", "free",
                   "*__GI_abort", "*__GI_raise", "malloc_printerr",
                   "__libc_message", "_int_malloc", "_int_free",
                   '__kernel_vsyscall', 'abort', 'raise', 'malloc', 'free',
                   '*__GI_abort', '*__GI_raise', 'malloc_printerr',
                   '__libc_message', 'malloc_consolidate', '_int_malloc',
                   '__libc_calloc', '_dl_new_object', '_dl_map_object_from_fd',
                   '_dl_catch_error', '_dl_open', 'do_dlopen', 'dlerror_run',
                   '*__GI___libc_dlopen_mode', '_dl_map_object',
                   'dl_open_worker', 'munmap_chunk', '*__GI___backtrace',
                   '_dl_addr_inside_object', '_int_free', '*__GI___libc_free',
                   '__malloc_assert', 'sYSMALLOc', '_int_realloc',
                   '*__GI___libc_malloc', '*__GI___libc_realloc',
                   '_int_memalign', '*__GI___libc_memalign',
                   '__posix_memalign', 'malloc_consolidate', '__libc_malloc',
                   '__libc_realloc'),
        map_regex=re.compile(r".*/libc(\.|-).*|.*/libm(\.|-).*"))

    def __init__(self, bug_dirpath):
        basis = [('disassembly_text', '/Disassembly.txt'),
                 ('stacktrace_text', '/Stacktrace.txt'),
                 ('registers_text', '/Registers.txt'),
                 ('procmap_text', '/ProcMaps.txt')]
        for k, v in basis:
            setattr(self, k, open(bug_dirpath + v, "rt").read())
        self.metadata = json.load(open("%s/vulture.json" % bug_dirpath, "rt"))
        #print self.current_instruction()
        #print self.registers()
        #print "%x" % self.pc()
        #print self.procmaps()
        #print self.backtrace()
        #print self.signal()

    @memoized
    def arch(self):
        if "rip" in self.registers():
            return "x64"
        elif "eip" in self.registers():
            return "x86"
        else:
            raise NotImplementedError("Unknown arch: rip/eip not found")

    @memoized
    def registers(self):
        regs = {}
        for line in self.registers_text.splitlines():
            parts = line.strip().split()
            name = parts[0]
            val = int(parts[1], 16)
            regs[name] = val
        return regs

    @memoized
    def backtrace(self):
        return Backtrace(self.stacktrace_text, self.blacklist)

    def hash(self):
        return self.backtrace().hash

    @memoized
    def procmaps(self):
        return ProcMaps(self.procmap_text)

    @memoized
    def faulting_frame(self):
        for frame in self.backtrace():
            if not frame.blacklisted:
                return frame
        warnings.warn("All frames blacklisted")
        return None

    @staticmethod
    def sym_addr(sym):
        try:
            return gdb_uint(gdb.parse_and_eval(str(sym)))
        except gdb.error:
            return None

    @memoized
    def current_instruction(self):
        import re
        re_hex_int = re.compile(r"^(0x[A-Fa-f0-9]+).*$")
        lines = [l.strip() for l in self.disassembly_text.splitlines()]
        inst = None
        for i in range(0, len(lines)):
            if "=>" in lines[i]:
                inst = lines[i]
                # handle long instructions that break across lines
                j = 1
                while not re.match(re_hex_int, lines[i + j]):
                    inst += lines[i + j]
                    j += 1
                break

        # if no "=>", then try the first line
        if not inst:
            inst = lines[0]
            # handle long instructions that break across lines
            for i in range(1, len(lines)):
                if re.match(re_hex_int, lines[i]):
                    break
                inst += lines[i]

        return x86Instruction(inst, self)

    @memoized
    def pc(self):
        if self.arch() == "x86":
            return self.registers()['eip']
        else:
            return self.registers()['rip']

    @memoized
    def stack_pointer(self):
        if self.arch() == "x86":
            return self.registers()['esp']
        else:
            return self.registers()['rsp']

    @memoized
    def counter(self):
        if self.arch() == "x86":
            return self.registers()['ecx']
        else:
            return self.registers()['rcx']

    @memoized
    def pid(self):
        return gdb.inferiors()[0].pid

    @memoized
    def pointer_size(self):
        if self.arch() == "x86":
            return 4
        elif self.arch() == "x64":
            return 8
        raise NotImplementedError("unsupported arch")

    @memoized
    def signal(self):
        return self.metadata['sigtext']

    @memoized
    def si_addr(self):
        str(gdb.parse_and_eval("$_siginfo._sifields._sigfault.si_addr"))
        return gdb_uint(
            gdb.parse_and_eval("$_siginfo._sifields._sigfault.si_addr"))