Ejemplo n.º 1
0
    def _resolve_ctype(self, type_as_string, type_size):
        # type: (types.StringTypes, types.IntType) -> types.TypeType

        """
        Given a string representing a type, and the type size - resolve a ctypes type

        :param type_as_string: str - A string representing a native type
        :param type_size: The size of the underlying native type
        :return: A ctypes type representing the native type
        :rtype: type
        """
        assert_type(types.StringTypes, type_as_string=type_as_string)
        assert_type(types.IntType, type_size=type_size)

        # Qualifiers are irrelevant
        type_as_string = type_as_string.replace('const', '').strip()

        if NativeArgument.is_native_type_a_string(type_as_string):
            self.logger.debug('Spotted a string - will use an array instead')
            return ct.c_char * self.MAX_ARRAY_SIZE
        else:
            return self.C_TYPE_MAPPING.get(
                type_as_string,
                self.C_TYPE_MAPPING.get(type_size, ct.c_int)
            )
Ejemplo n.º 2
0
    def _generate_data_struct_copy_body(self, syscall_args):
        # type: (types.ListType) -> types.StringTypes
        """
        Generates a string representing the copying of the syscall arguments to our shared data-structure

        :param syscall_args: List of syscall args
        :return: A string representing all copying to be made from syscall arguments
                (that are passed to our ebpf handler) back to user mode
        :rtype: str
        """
        assert_type(types.ListType, syscall_args=syscall_args)

        body = '''
    data.current_time_ns = bpf_ktime_get_ns();
    data.process_id = bpf_get_current_pid_tgid() >> 32;
    data.thread_id = (u32) bpf_get_current_pid_tgid();
    data.group_id = bpf_get_current_uid_gid() >> 32;
    data.user_id = (u32) bpf_get_current_uid_gid();
    bpf_get_current_comm(&data.process_name, sizeof(data.process_name));         
'''

        for arg in syscall_args:
            if arg.is_string():
                self.logger.debug('Spotted a string - will use bpf_probe_read to copy it')
                body += 'bpf_probe_read(&data.{arg_name}, sizeof(data.{arg_name}), (void*){arg_name});\n    '.format(
                    arg_name=arg.name)
            else:
                body += 'data.{arg_name} = {arg_name};\n    '.format(arg_name=arg.name)

        return body
Ejemplo n.º 3
0
    def __init__(self, program_text, ctypes_data_class):
        # type: (types.StringTypes, types.TypeType) -> None
        """
        :param program_text: str - BPF-program text
        :param ctypes_data_class: ctypes.Structure - A Data class representing any extra parameters for the ebpf routine
        """
        assert_type(types.StringTypes, program_text=program_text)
        assert_type(types.TypeType, ctypes_data_class=ctypes_data_class)

        self.program_text = program_text
        self.ctypes_data_class = ctypes_data_class
Ejemplo n.º 4
0
    def attach_kprobe(self, event, fn=None, implicitly_add_syscall_args=True, **kwargs):
        # type: (types.StringTypes, types.FunctionType, types.BooleanType, **types.ObjectType) -> None
        """
        Attaches a kernel probe to a given python function

        :param event: Name of the event to attach to
        :param fn: Python function to invoke
        :param implicitly_add_syscall_args: If True, will try to implicitly generate the syscall arguments,
                                            copying them from our ebpf routine back to user-space
        """
        assert_type(types.StringTypes, event=event)
        if fn is not None:
            assert_type(types.FunctionType, fn=fn)

        event = normalize_event(event)
        attached_kprobe_descriptor = None

        try:
            if not fn:
                self.logger.debug('Function was not passed - fallbacking to default implementation')
                attached_kprobe_descriptor = AttachedKProbeDescriptor(event, self)
                return super(EBPFWrapper, self).attach_kprobe(event, **kwargs)

            event_without_syscall_prefix = None

            # noinspection PyUnresolvedReferences
            for syscall_prefix in self._syscall_prefixes:
                syscall_prefix = syscall_prefix.lower()
                if event.startswith(syscall_prefix) or \
                        event.startswith(self.KRPOBE_EVENT_SYSCALL_PREFIX + syscall_prefix):
                    event_without_syscall_prefix = event \
                        .replace(syscall_prefix, '') \
                        .replace(self.KRPOBE_EVENT_SYSCALL_PREFIX, '')
                    break

            if event_without_syscall_prefix is None:
                self.logger.debug('Event is not prefixed with a known syscall prefix - '
                                  'fallbacking to default implementation')
                attached_kprobe_descriptor = AttachedKProbeDescriptor(event, self)
                return super(EBPFWrapper, self).attach_kprobe(event, **kwargs)

            event_without_syscall_prefix = self._replace_event_alias_if_needed(event_without_syscall_prefix)
            attached_kprobe_descriptor = self._attach_kprobe_with_managed_polling_thread(event,
                                                                                         event_without_syscall_prefix,
                                                                                         fn,
                                                                                         implicitly_add_syscall_args)
        finally:
            if attached_kprobe_descriptor is not None:
                self._attached_kprobes.setdefault(event, []).append(attached_kprobe_descriptor)
Ejemplo n.º 5
0
    def _read_buffer_pool(self, bpf, callback, data_class, detach_event):
        # type: (BPF, types.FunctionType, types.TypeType, _Event) -> None

        """
        A routine that will be called from a separate thread, that will call a given python callback, till
        detach_event is set.

        :param bpf: A BPF instance
        :param callback: A callback to call to whenever our kprobe was invoked
        :param data_class: A ctypes data class to pass to the routine as a kwarg
        :param detach_event: An event to check against, once this is set, our thread will stop polling
        """
        assert_type(BPF, bpf=bpf)
        assert_type(types.FunctionType, callback=callback)
        assert_type(types.TypeType, data_class=data_class)
        assert_type(_Event, detach_event=detach_event)

        thread_name = current_thread().getName()

        def call_callback(cpu, data, size):
            # type: (types.IntType, types.TypeType, types.IntType) -> None

            """
            On every BPF map poll, we'll call this wrapper, which will call the passed callback.

            :param cpu: CPU #
            :param data: Shread data class from our EBPF routine
            :param size: Size of data class, in bytes
            """
            # noinspection PyUnresolvedReferences,PyBroadException
            try:
                # noinspection PyUnresolvedReferences
                data = ct.cast(data, ct.POINTER(data_class)).contents
            except Exception:
                # Try cast data best effortly
                pass

            if not detach_event.is_set():
                # noinspection PyBroadException
                try:
                    callback(cpu=cpu, data=data, size=size)
                except Exception:
                    # Best-effort callback
                    self.logger.exception('Failed calling callback')

        # noinspection PyUnresolvedReferences
        bpf['events'].open_perf_buffer(call_callback)
        while not detach_event.is_set():
            # noinspection PyBroadException
            try:
                # noinspection PyUnresolvedReferences
                bpf.perf_buffer_poll()
            except Exception:
                # Best-effort polling, if we fail - we should kill the thread
                self.logger.exception('Failed polling from bpf map')
                break

        self.logger.info('Polling thread is detached name={}'.format(thread_name))
Ejemplo n.º 6
0
    def _replace_event_alias_if_needed(self, event_without_syscall_prefix):
        # type: (types.StringTypes) -> types.StringTypes
        """
        Follows known event aliases (if any) and replaces them, or return the origin event

        :param event_without_syscall_prefix: str - The event without a syscall prefix
        :return: str - The dereferenced event from the alias, or the original event if no alias was found
        """
        assert_type(types.StringTypes, event_without_syscall_prefix=event_without_syscall_prefix)

        event_alias = self.SYSCALL_EVENT_ALIASES.get(event_without_syscall_prefix, None)
        if event_alias is not None:
            self.logger.debug('Event is an alias, will replace "{}" with "{}"'.format(event_without_syscall_prefix,
                                                                                      event_alias))
            event_without_syscall_prefix = event_alias

        return event_without_syscall_prefix
Ejemplo n.º 7
0
    def _generate_program_descriptor(self, event, function_name, implicitly_add_syscall_args):
        # type: (types.StringTypes, types.StringTypes) -> EBPFProgramDescriptor
        """
        :param event: str - An event / syscall name
        :param function_name: A function name (The ebpf function will use this name)
        :param implicitly_add_syscall_args: If True, will try to implicitly generate the syscall arguments
        :return: A program text that copies all of the syscall parameters back to user-space
        :rtype: BPFProgramDescriptor
        """
        assert_type(types.StringTypes, event=event)
        assert_type(types.StringTypes, function_name=function_name)

        syscall_args = []
        if not implicitly_add_syscall_args:
            self.logger.debug('Implicit syscall argument generation is disabled')
        else:
            syscall_args = self._get_syscall_arguments(event)

        program_text = '''
#include <uapi/linux/ptrace.h>
#include <uapi/linux/limits.h>
#include <linux/sched.h>

BPF_PERF_OUTPUT(events);                    
'''

        data_struct = self._generate_data_struct(syscall_args)
        program_text += '{data_struct}'.format(data_struct=data_struct)

        data_struct_copy_body = self._generate_data_struct_copy_body(syscall_args)
        function_signature = self._generate_function_signature(function_name, syscall_args)
        program_text += '''
%(func_signature)s {
    struct data_t data = {};
                
    %(data_struct_copy_body)s           
    events.perf_submit(ctx, &data, sizeof(data));
    return 0;
}
''' % dict(func_signature=function_signature, data_struct_copy_body=data_struct_copy_body)

        data_class = self._generate_data_class(syscall_args)
        return EBPFProgramDescriptor(program_text, data_class)
Ejemplo n.º 8
0
    def _generate_data_struct(self, syscall_args):
        # type: (types.ListType) -> types.StringTypes
        """
        Generates a data structure representing string that is copied from kernel-space to user-space.

        :param syscall_args: List of syscall args
        :return: Data structure string to be shared from kernel space to user space (Could be empty)
        :rtype: str
        """
        assert_type(types.ListType, syscall_args=syscall_args)

        data_struct_template = '''
struct data_t {
    %(syscall_args)s;
};
            
'''

        formatted_args = OrderedDict()
        for arg in (self.DEFAULT_DATA_STRUCTURE_MEMBERS + syscall_args):
            if arg.name in formatted_args:
                self.logger.warn('Duplicate arg found - will take the last match')

            if arg.is_string():
                self.logger.debug('Spotted a string - implicitly converting to char array')
                # noinspection PyBroadException
                try:
                    # noinspection PyUnresolvedReferences, PyProtectedMember
                    array_size = arg.ctypes_type._length_
                except Exception:
                    # Try get array size from ctypes type best-effortly
                    array_size = self.MAX_ARRAY_SIZE

                # noinspection PyTypeChecker
                arg = NativeArgument(
                    'char',
                    '{arg_name}[{array_size}]'.format(arg_name=arg.name, array_size=array_size),
                    ct.c_char * array_size
                )
            formatted_args[arg.name] = str(arg)

        return data_struct_template % dict(syscall_args=';\n    '.join(formatted_args.values()))
Ejemplo n.º 9
0
    def _generate_function_signature(function_name, syscall_args):
        # type: (types.StringTypes, types.ListType) -> types.StringTypes
        """

        :param function_name: The name of the ebpf function
        :param syscall_args: List of syscall arguments
        :return: A string representing the ebpf function signature (Including the syscall arguments, and the mandatory
                registers context struct)
        :rtype: str
        """
        assert_type(types.StringTypes, function_name=function_name)
        assert_type(types.ListType, syscall_args=syscall_args)

        syscall_args = map(lambda x: str(x), syscall_args)
        base_function_signature = 'int {function_name} (struct pt_regs* ctx'.format(function_name=function_name)
        if syscall_args:
            base_function_signature += ', {syscall_args}'.format(syscall_args=', '.join(syscall_args))

        base_function_signature += ')'
        return base_function_signature
Ejemplo n.º 10
0
    def _generate_data_class(self, syscall_args):
        # type: (types.ListType) -> types.TypeType

        """
        Generates a ctypes data-class given the syscall arguments

        :param syscall_args: list - A list of NativeArgument objects, representing syscall arguments
                                    that are passed to our ebpf routine
        :return: ctypes class that the members to copy from our ebpf routine back to user-space
        :rtype: type
        """
        assert_type(types.ListType, syscall_args=syscall_args)
        fields = [(arg.name, arg.ctypes_type) for arg in (self.DEFAULT_DATA_STRUCTURE_MEMBERS + syscall_args)]

        # noinspection PyUnresolvedReferences
        class Data(ct.Structure):
            _fields_ = fields

        self.logger.debug('Generated data class fields: {}'.format(fields))

        return Data
Ejemplo n.º 11
0
    def _attach_kprobe_with_managed_polling_thread(self, event, event_without_syscall_prefix, fn,
                                                   implicitly_add_syscall_args):
        # type: (types.StringTypes, types.StringTypes, types.FunctionType, types.BooleanType) -> AttachedKProbeDescriptor
        """
        Attaches a kprobe with a given event, and spawns a daemon thread that polls on the kprobe map and calls
        the passed function as a callback.

        :param event: str - An event
        :param event_without_syscall_prefix: str - An event without its syscall prefix
        :param fn: function - A function to call once the kprobe was invoked
        :param implicitly_add_syscall_args: If True, will try to implicitly generate the syscall arguments
        :return: An AttachedKProbeDescriptor object that wraps the event, the bpf object and the polling thread
        :rtype: AttachedKProbeDescriptor
        """
        assert_type(types.StringTypes, event=event)
        assert_type(types.StringTypes, event_without_syscall_prefix=event_without_syscall_prefix)
        assert_type(types.FunctionType, fn=fn)

        function_name = fn.__name__
        self.logger.debug('Attaching kprobe to event={}'.format(event))
        program_descriptor = self._generate_program_descriptor(event_without_syscall_prefix, function_name,
                                                               implicitly_add_syscall_args)
        bpf = BPF(text=program_descriptor.program_text)
        bpf.attach_kprobe(event=event, fn_name=function_name)

        detach_event = Event()
        t = Thread(target=self._read_buffer_pool, name='{}::{}_thread'.format(event, function_name),
                   args=(bpf, fn, program_descriptor.ctypes_data_class, detach_event))
        t.setDaemon(True)
        t.start()

        return AttachedKProbeDescriptor(event, bpf, detach_event)
Ejemplo n.º 12
0
    def detach_kprobe(self, event):
        # type: (types.StringTypes) -> None
        """
        Detaches kprobes associated with the event nmae.

        :param event: str - Event name to detach kprobes of
        """
        assert_type(types.StringTypes, event=event)

        event = normalize_event(event)

        if event not in self._attached_kprobes:
            self.logger.info('{} is not an attached kprobe'.format(event))
            return

        kprobes_for_event = self._attached_kprobes.get(event, [])
        kprobes_for_event_copy = list(kprobes_for_event)

        for idx, descriptor in enumerate(kprobes_for_event_copy):
            # noinspection PyBroadException
            try:
                self.logger.info('Detaching kprobe with event={} bpf_idx={}'.format(event, idx))
                if descriptor.bpf == self:
                    # noinspection PyUnresolvedReferences
                    super(EBPFWrapper, self).detach_kprobe(event)
                else:
                    descriptor.bpf.detach_kprobe(event)

                if descriptor.polling_thread_event is not None:
                    descriptor.polling_thread_event.set()

                kprobes_for_event.pop(idx)
            except Exception:
                self.logger.exception('Failed detaching kprobe for event={} bpf_idx={}; Continuing'.format(event, idx))

        if not kprobes_for_event:
            self._attached_kprobes.pop(event)
Ejemplo n.º 13
0
    def __init__(self, native_type_as_string, name, ctypes_type):
        # type: (types.StringTypes, types.StringTypes, types.TypeType) -> None

        """
        :param native_type_as_string: str - A string representing the c-type of the argument, e.g. 'unsigned int'
        :param name: str - The name of the argument
        :param ctypes_type: The type represented as a ctypes type
        """
        assert_type(types.StringTypes, native_type_as_string=native_type_as_string)
        assert_type(types.StringTypes, name=name)
        assert_type(types.TypeType, ctypes_type=ctypes_type)

        self.native_type_as_string = native_type_as_string
        self.name = name
        self.ctypes_type = ctypes_type
Ejemplo n.º 14
0
    def __init__(self, event, bpf, polling_thread_event=None):
        # type: (types.StringTypes, BPF, [Event]) -> None

        """
        :param event: str - An event name
        :param bpf: BPF - A BPF object with a compiled module
        :param polling_thread_event: Event - An event to set once the kprobe is detached
        """

        assert_type(types.StringTypes, event=event)
        assert_type(BPF, bpf=bpf)

        if polling_thread_event is not None:
            assert_type(_Event, polling_thread_event=polling_thread_event)

        self.event = normalize_event(event)
        self.bpf = bpf
        self.polling_thread_event = polling_thread_event
Ejemplo n.º 15
0
    def _get_syscall_arguments(self, syscall_name, skip_till_offset=SKIP_SYSCALL_ARGS_TILL_OFFSET):
        # type: (types.StringTypes, types.IntType) -> types.ListType
        """

        :param syscall_name: str - The name of the syscall to get the arguments for
        :param skip_till_offset: An offset that all arguments (inclusive) till it, are skipped
        :return: A list of NativeArgument objects, representing the syscall arguments, or None, if parsing fails
        :rtype: list
        """

        assert_type(types.StringTypes, syscall_name=syscall_name)
        assert_type(types.IntType, skip_till_offset=skip_till_offset)

        format_file_path = self.SYSCALL_FORMAT_FILE_TEMPLATE.format(syscall_name=syscall_name)
        self.logger.debug('Will try to read syscall format file={}'.format(format_file_path))

        # noinspection PyBroadException
        try:
            args = []

            with open(format_file_path) as f:
                for line in f:
                    stripped = line.strip()
                    if not stripped.startswith('field:'):
                        continue

                    format_parts = [x.strip() for x in stripped.replace('field:', '').split(';') if x.strip()]
                    assert len(format_parts) == 4, 'Failed parsing syscall field format; ' \
                                                   'Expected a 4 parts separated by ";"'

                    self.logger.debug('Parsing field line={}'.format(line))

                    offset = format_parts[1]
                    assert offset.startswith('offset:'), 'Expected offset part to start with "offset:"'

                    offset_match = re.findall('\d+', offset)
                    assert offset_match and len(offset_match) == 1, 'Expected offset part to contain a single number'

                    offset_num = int(offset_match[0])
                    if offset_num < skip_till_offset:
                        self.logger.debug('Skipping field with offset={}'.format(offset_num))
                        continue

                    size = format_parts[2]
                    assert size.startswith('size:'), 'Expected size part to start with "size:"'

                    size_match = re.findall('\d+', size)
                    assert size_match and len(size_match) == 1, 'Expected size part to contain a single number'

                    type_size = int(size_match[0])

                    type_and_name = format_parts[0]
                    splat = type_and_name.split(' ')
                    assert len(splat) >= 2, 'Expected type and name part to contain at least two spaces'

                    c_type = ' '.join(splat[:-1])
                    name = splat[-1]

                    native_arg = NativeArgument(c_type, name, self._resolve_ctype(c_type, type_size))
                    self.logger.debug('Parsed native arg={}'.format(native_arg))
                    args.append(native_arg)

                    if len(args) > self.MAX_PASSED_ARGS:
                        self.logger.warn("We've populated the maximum number of arguments ({}); "
                                         "Will return a partial argument list".format(self.MAX_PASSED_ARGS))
                        break
        except Exception as e:
            self.logger.error('Failed parsing syscall format file from path={} err={}; '
                              'will return None'.format(format_file_path, e.message))
            return []

        return args