Beispiel #1
0
    def acquire_semaphore(self):
        """Wait for the semaphore to become available, or return immediately if this module does not use a semaphore."""
        if self.semaphore_name is None:
            logging.warning("semaphore name is None for {}".format(self))
            return False

        if self.cancel_analysis_flag:
            logging.info("called acquire_semaphore() for module {} but cancel_analysis_flag is set".format(self))
            return False

        # semaphores can be globally disabled
        if not saq.SEMAPHORES_ENABLED:
            logging.debug("semaphores are disabled - not using semaphore {}".format(self.semaphore))
            return False

        # create a new semaphore client to use
        self.semaphore = NetworkSemaphoreClient()

        #logging.debug("analysis module {0} acquiring semaphore {1}".format(self, self.semaphore_name))
        try:
            if not self.semaphore.acquire(self.semaphore_name):
                raise RuntimeError("acquire returned False")
            #logging.debug("analysis module {0} acquired semaphore {1}".format(self, self.semaphore_name))
        except Exception as e:
            logging.error("unable to acquire semaphore {} : {}".format(self.semaphore_name, e))
            report_exception()

            self.semaphore = None

            # TODO fall back to something else we can use
            return False

        return True
Beispiel #2
0
    def acquire_concurrency_lock(self):
        """Acquires a concurrency lock for this type of hunt if specified in the configuration for the hunt.
           Returns a NetworkSemaphoreClient object if the concurrency_type is CONCURRENCY_TYPE_NETWORK_SEMAPHORE
           or a reference to the threading.Semaphore object if concurrency_type is CONCURRENCY_TYPE_LOCAL_SEMAPHORE.
           Immediately returns None if non concurrency limits are in place for this type of hunt."""

        if self.concurrency_type is None:
            return None

        result = None
        start_time = local_time()
        if self.concurrency_type == CONCURRENCY_TYPE_NETWORK_SEMAPHORE:
            logging.debug(f"acquiring network concurrency semaphore {self.concurrency_semaphore} "
                          f"for hunt type {self.hunt_type}")
            result = NetworkSemaphoreClient(cancel_request_callback=self.manager_control_event.is_set)
                                                                # make sure we cancel outstanding request 
                                                                # when shutting down
            result.acquire(self.concurrency_semaphore)
        else:
            logging.debug(f"acquiring local concurrency semaphore for hunt type {self.hunt_type}")
            while not self.manager_control_event.is_set():
                if self.concurrency_semaphore.acquire(blocking=True, timeout=0.1):
                    result = self.concurrency_semaphore
                    break

        if result is not None:
            total_seconds = (local_time() - start_time).total_seconds()
            logging.debug(f"acquired concurrency semaphore for hunt type {self.hunt_type} in {total_seconds} seconds")

        return result
Beispiel #3
0
class AnalysisModule(object):
    """The base class of all analysis logic.  All your custom analysis modules extend this class."""
    def __init__(self, config_section, *args, **kwargs):
        super().__init__(*args, **kwargs)
        assert isinstance(config_section, str)

        # the section in the configuration that applies to this analysis module
        self.config_section = config_section
        self.config = None
        self._load_config()

        # a refernce to the RootAnalysis object we're analyzing
        self.root = None

        # a reference to the engine this module is running out of
        self.engine = None

        # the actual semaphore to use
        self.semaphore = None

        # we'll keep track of the Analysis and Observable objects we've generated
        # this is useful for cleanup routines
        self.generated_analysis = []
        self.generated_observables = []

        # observables that are excluded from being analyzed by this module
        self.observable_exclusions = {}  # key = o_type, value = [] of o_value
        self.load_exclusions()

        # observables that are excluded from being generated by this module
        self.expected_observables = {}  # key = o_type, value = set(o_value)
        self.load_expected_observables()

        # something might try to cancel an analysis execution
        self.cancel_analysis_flag = False

        # sometimes a module can depend on another service that is failing
        # when that happens we can trigger "cooldown periods" where we skip executing this module until some time
        # has elapsed

        # the time at which the cooldown expires (None if no cooldown is in effect)
        self.cooldown_timeout = None

        # a list (set) of files that are currently being watched
        self.watched_files = {}  # key = path, value = WatchedFile

        # set to true if this is a threaded module
        self.is_threaded = False
        if 'threaded' in self.config:
            self.is_threaded = self.config.getboolean('threaded')

        # how often self.execute_threaded is called from the self.execute_threaded_loop
        self.threaded_execution_frequency = 1
        if 'threaded_execution_frequency' in self.config:
            self.threaded_execution_frequency = self.config.getint(
                'threaded_execution_frequency')

        # the actual thread performing the work
        self.threaded_execution_thread = None

        # event to signal the thread can stop
        self.threaded_execution_stop_event = None

        # the priority of the analysis module
        # lower priority scores go first
        # higher priority scores go last
        self.priority = self.config.getint('priority', fallback=10)

        # the next time we check to see if any files we are watching have changed
        # this is automatically checked and updated every time this module is used to analyze something
        self.next_check_watched_files = None

    def start_threaded_execution(self):
        if not self.is_threaded:
            return

        self.threaded_execution_stop_event = threading.Event()
        self.threaded_execution_thread = threading.Thread(
            target=self.execute_threaded_loop_wrapper,
            name="Threaded Module {}".format(self))
        self.threaded_execution_thread.start()
        logging.info("started thread {}".format(
            self.threaded_execution_thread))

    def stop_threaded_execution(self):
        if not self.is_threaded:
            return

        logging.info("stopping threaded execution for {}".format(self))

        self.threaded_execution_stop_event.set()
        start = datetime.datetime.now()
        while True:
            self.threaded_execution_thread.join(5)
            if not self.threaded_execution_thread.is_alive():
                break

            logging.error("thread {} is not stopping".format(
                self.threaded_execution_thread))

            # have we been waiting for a really long time?
            if (datetime.datetime.now() - start
                ).total_seconds() >= saq.EXECUTION_THREAD_LONG_TIMEOUT:
                logging.critical(
                    "execution thread {} is failing to stop - process dying".
                    format(self.threaded_execution_thread))
                # suicide
                os._exit(1)

        logging.debug("threaded execution module {} has stopped ({})".format(
            self, self.threaded_execution_thread))

    def reset(self):
        """Resets the module to initialized state."""
        self.root = None
        self.semaphore = None
        self.generated_analysis = []
        self.generated_observables = []
        self.cancel_analysis_flag = False
        self.cooldown_timeout = None

    @property
    def cooldown_period(self):
        """Number of seconds this module stays in cooldown state.  Defaults to 60."""
        if 'cooldown_period' in self.config:
            return self.config.getint('cooldown_period')

        # if not defined then we default to 60 second cooldown
        return 60

    @property
    def semaphore_name(self):
        """The semaphore this module uses.  Defaults to None (no semaphore is used.)"""
        if 'semaphore' in self.config:
            return self.config['semaphore']

        return None

    def _load_config(self):
        # has the configuration not changed?
        if self.config is saq.CONFIG[self.config_section]:
            return

        # reference to the configuration object itself
        self.config = saq.CONFIG[self.config_section]
        self.load_config()

    def load_config(self):
        """Called when saq.CONFIG changes."""
        pass

    def watch_file(self, path, callback):
        """Watches the given file and executes callback when it detects that the file has been modified."""
        if path in self.watched_files:
            logging.warning("replacing callback {} for {} with {}".format(
                self.watched_files[path], path, callback))

        logging.debug("watching file {}".format(path))
        self.watched_files[path] = WatchedFile(path, callback)
        # go ahead and load it up
        self.check_watched_files()

    def check_watched_files(self):
        # is it time to check the files this module is watching?
        if self.next_check_watched_files is not None and datetime.datetime.now(
        ) < self.next_check_watched_files:
            return

        # check every 5 seconds
        self.next_check_watched_files = datetime.datetime.now() + \
                                        datetime.timedelta(seconds=saq.CONFIG['global'].getint(
                                                           'check_watched_files_frequency'))

        for watched_file in self.watched_files.values():
            try:
                current_mtime = os.stat(watched_file.path).st_mtime
                if watched_file.last_mtime != current_mtime:
                    logging.info("detected change to {}".format(
                        watched_file.path))
                    watched_file.last_mtime = current_mtime

                    try:
                        watched_file.callback()
                    except Exception as e:
                        logging.error(
                            "callback failed for {} file {}: {}".format(
                                self, watched_file.path, e))
                        report_exception()

            except Exception as e:
                logging.error("unable to check file {}: {}".format(
                    watched_file.path, e))
                report_exception()

    def verify_environment(self):
        """Called after module is loaded to verify that everything it needs exists.."""
        pass

    def verify_config_exists(self, config_name):
        """Verifies the given configuration exists for this module.  Use this from verify_environment."""
        try:
            _ = self.config[config_name]
        except KeyError:
            raise KeyError("module {} missing configuration item {}".format(
                self, config_name))

    def verify_path_exists(self, path):
        """Verifies the given path exists.  If the path is relative then it is relative to SAQ_HOME."""
        _path = path
        if not os.path.isabs(path):
            _path = os.path.join(saq.SAQ_HOME, path)

        if not os.path.exists(_path):
            raise RuntimeError("missing {} used by {}".format(path, self))

    def verify_program_exists(self, path):
        """Verifies the given program exists on the system.  If relative then $PATH is checked using which."""
        from subprocess import Popen, DEVNULL

        if os.path.isabs(path):
            if not os.path.exists(path):
                raise RuntimeError("missing {} used by {}".format(path, self))
        else:
            p = Popen(['which', path], stdout=DEVNULL, stderr=DEVNULL)
            p.wait()

            if p.returncode:
                raise RuntimeError("cannot find {} used by {}".format(
                    path, self))

    def create_required_directory(self, dir):
        """Creates the given directory if it does not already exist.  Relative paths are relative to DATA_DIR."""

        if not os.path.isabs(dir):
            dir = os.path.join(saq.DATA_DIR, dir)

        if os.path.isdir(dir):
            return

        try:
            logging.debug("creating required directory {}".format(dir))
            os.makedirs(dir)
        except Exception as e:
            if not os.path.isdir(dir):
                logging.error(
                    "unable to create required directory {} for {}: {}".format(
                        dir, self, e))
                raise e

    @property
    def name(self):
        return self.config_section[len('analysis_module_'):]

    @property
    def shutdown(self):
        """Returns True if the current analysis engine is shutting down, False otherwise."""
        return self.engine.shutdown

    def sleep(self, seconds):
        """Utility function to sleep for N seconds without blocking shutdown."""
        while not self.shutdown and not self.cancel_analysis_flag and seconds > 0:
            # we also want to support sleeping for less than a second
            time.sleep(1 if seconds > 0 else seconds)
            seconds -= 1

    @property
    def state(self):
        """Returns the dict object you can use to maintain state over time."""
        try:
            return self.root.state[self.name]
        except KeyError:
            return None

    @state.setter
    def state(self, value):
        self.root.state[self.name] = value

    def initialize_state(self, value={}):
        """Sets the state for this module to the given value (defaults to empty dict.)
           If the state is already set this function does nothing."""
        if self.name not in self.root.state:
            self.root.state[self.name] = value

    def wait_for_analysis(self, observable, analysis_type):
        """Waits for the given Analysis (by type) be available for the given Observable."""
        from saq.engine import WaitForAnalysisException

        assert isinstance(observable, Observable)
        assert isinstance(analysis_type, type)

        # do we already have a dependency here?
        dep = observable.get_dependency(str(analysis_type))

        # have we already analyzed this observable for this anaysis type?
        analysis = observable.get_analysis(analysis_type)

        # if the dependency has been completed or resolved then we just return whatever we got
        # even if it was nothing
        if dep and (dep.completed or dep.resolved or dep.failed):
            return analysis

        # if we haven't analyzed this yet or we have and it hasn't completed yet (delayed) then we wait
        if analysis is None or isinstance(analysis,
                                          Analysis) and not analysis.completed:
            raise WaitForAnalysisException(observable, analysis_type)

        # otherwise we return the analysis
        return analysis

    def enter_cooldown(self):
        """Puts this module into cooldown mode which will cause it to get skipped for self.cooldown_period seconds."""
        self.cooldown_timeout = datetime.datetime.now() + datetime.timedelta(
            seconds=self.cooldown_period)
        logging.warning("{} entered cooldown period until {}".format(
            self, self.cooldown_timeout))

    def add_observable_exclusion(self, o_type, o_value):
        if o_type not in self.observable_exclusions:
            self.observable_exclusions[o_type] = []

        if o_value not in self.observable_exclusions[o_type]:
            self.observable_exclusions[o_type].append(o_value)
            #logging.debug("loaded observable exclusion type {} value {} for {}".format(
            #o_type, o_value, self))

    def is_excluded(self, observable):
        """Returns True if the given observable is excluded from analysis for this module."""
        if observable.type not in self.observable_exclusions:
            return False

        for exclusion_value in self.observable_exclusions[observable.type]:
            if observable.matches(exclusion_value):
                return True

        return False

    def load_exclusions(self):
        # load any observable exclusions for this module
        self.observable_exclusions = {}
        for key in self.config.keys():
            if key.startswith("exclude_"):
                o_type, o_value = self.config[key].split(':', 1)
                if o_type == 'observable_group':
                    # load exclusion from observable group
                    logging.debug(
                        "loading exclusion list from observable group {} for {}"
                        .format(o_value, self))
                    config_key = 'observable_group_{}'.format(o_value)
                    for key in saq.CONFIG[config_key].keys():
                        if key.startswith('define_'):
                            o_type, o_value = saq.CONFIG[config_key][
                                key].split(':', 1)
                            self.add_observable_exclusion(o_type, o_value)
                else:
                    self.add_observable_exclusion(o_type, o_value)

        # append global exclusions to this list
        for option_name in saq.CONFIG['observable_exclusions'].keys():
            o_type, o_value = saq.CONFIG['observable_exclusions'][
                option_name].split(':', 1)
            self.add_observable_exclusion(o_type, o_value)

    def add_expected_observable(self, o_type, o_value):
        """Adds the given observable as an expected observable of this module.
           Expected observables are never generated during analysis."""
        if o_type not in self.expected_observables:
            self.expected_observables[o_type] = set()

        self.expected_observables[o_type].add(o_value)
        logging.debug(
            "loaded expected observable type {} value {} for {}".format(
                o_type, o_value, self))

    def is_expected_observable(self, o_type, o_value):
        """Returns True if the given observable is an expected observable for this module."""
        try:
            return o_value in self.expected_observables[o_type]
        except KeyError:
            logging.debug("o_type {} not in {}".format(o_type, self))
            return False

    def load_expected_observables(self):
        self.expected_observables = {}
        for key in self.config.keys():
            if key.startswith("expect_"):
                o_type, o_value = self.config[key].split(':', 1)
                self.add_expected_observable(o_type, o_value)

    @property
    def generated_analysis_type(self):
        """Returns the type of the Analysis-based class this AnalysisModule generates.  
           Returns None if this AnalysisModule does not generate an Analysis object."""
        return None

    def create_analysis(self, observable):
        """Initializes and adds the generated Analysis for this module to the given Observable. 
           Returns the generated Analysis."""
        # have we already created analysis for this observable?
        if self.generated_analysis_type is None:
            logging.critical(
                "called create_analysis on {} which does not actually create Analysis"
                .format(self))
            return None

        analysis = observable.get_analysis(self.generated_analysis_type)
        if analysis:
            logging.debug(
                "returning existing analysis {} in call to create analysis from {} for {}"
                .format(analysis, self, observable))
            return analysis

        # otherwise we create and initialize a new one
        analysis = self.generated_analysis_type()
        analysis.initialize_details()
        observable.add_analysis(analysis)
        return analysis

    # XXX this is not supported at all
    @property
    def valid_analysis_target_type(self):
        """Returns a valid analysis target type for this module.  
           Defaults to Observable.  Return None to disable the check."""
        return Observable

    @property
    def valid_observable_types(self):
        """Returns a single (or list of) Observable type that are valid for this module.  
           Defaults to None (all types are valid.)  Return None to disable the check."""
        return None

    @property
    def required_directives(self):
        """Returns a list of required directives for the analysis to occur.  Defaults to empty list."""
        return []

    def accepts(self, obj):
        """Returns True if this object should be analyzed by this module, False otherwise."""

        # we still call execution on the module in cooldown mode
        # there may be things it can (or should) do while on cooldown

        # if this analysis module does not generate analysis then we can skip this
        # these are typically analysis modules that only do pre or post analysis work
        if self.generated_analysis_type is None:
            return False

        if self.valid_analysis_target_type is not None:
            if not isinstance(obj, self.valid_analysis_target_type):
                logging.debug("{} is not a valid target type for {}".format(
                    obj, self))
                return False

        if isinstance(obj,
                      Observable) and self.valid_observable_types is not None:
            # a little hack to allow valid_observable_types to return a single value
            valid_types = self.valid_observable_types
            if isinstance(valid_types, str):
                valid_types = [valid_types]

            try:
                if obj.type not in valid_types:
                    logging.debug("{} is not a valid type for {}".format(
                        obj.type, self))
                    return False
            except Exception as e:
                logging.error(
                    "valid_observable_types returned invalid data type {} for {}"
                    .format(type(valid_types), self))
                return False

        if isinstance(obj, Observable):
            # does this analysis module exclude this observable from analysis?
            if self.is_excluded(obj):
                logging.debug(
                    "observable {} is excluded from analysis by {}".format(
                        obj, self))
                return False

            # does this observable exclude itself from this kind of analysis?
            if obj.is_excluded(self):
                logging.debug(
                    "analysis module {} excluded from analyzing {}".format(
                        self, obj))
                return False

            # does this analysis module require directives?
            for directive in self.required_directives:
                if not obj.has_directive(directive):
                    logging.debug(
                        "{} does not have required directive {} for {}".format(
                            obj, directive, self))
                    return False

            # have we already generated analysis for this target?
            current_analysis = obj.get_analysis(self.generated_analysis_type)
            if current_analysis is not None:
                # did it return nothing?
                if isinstance(current_analysis, bool) and not current_analysis:
                    logging.debug(
                        "already analyzed {} with {} and returned nothing".
                        format(obj, self))
                    return False

                # has this analysis completed?
                if current_analysis.completed:
                    logging.debug("already analyzed {} with {}".format(
                        obj, self))
                    return False

        # are we in cooldown mode?
        # XXX side effect!
        if self.cooldown_timeout:
            # are we still in cooldown mode?
            if datetime.datetime.now() < self.cooldown_timeout:
                logging.debug("{} in cooldown mode".format(self))
            else:
                self.cooldown_timeout = None
                logging.info("{} exited cooldown mode".format(self))

        # end with custom logic, which defaults to True if not implemented
        return self.should_analyze(obj)

    def __str__(self):
        return type(self).__name__

    def cancel_analysis(self):
        """Try to cancel the analysis loop."""
        self.cancel_analysis_flag = True

        # also try to cancel a semaphore acquire request, if there is one
        # XXX there's certainly a race condition here
        if self.semaphore is not None:
            self.semaphore.cancel_request()

        # execute any custom handlers defined by the engine
        self.cancel_analysis_handler()

    def cancel_analysis_handler(self):
        """Override this function to implement custom cancel code."""
        pass

    def acquire_semaphore(self):
        """Wait for the semaphore to become available, or return immediately if this module does not use a semaphore."""
        if self.semaphore_name is None:
            logging.warning("semaphore name is None for {}".format(self))
            return False

        if self.cancel_analysis_flag:
            logging.info(
                "called acquire_semaphore() for module {} but cancel_analysis_flag is set"
                .format(self))
            return False

        # semaphores can be globally disabled
        if not saq.SEMAPHORES_ENABLED:
            logging.debug(
                "semaphores are disabled - not using semaphore {}".format(
                    self.semaphore))
            return False

        # create a new semaphore client to use
        self.semaphore = NetworkSemaphoreClient()

        #logging.debug("analysis module {0} acquiring semaphore {1}".format(self, self.semaphore_name))
        try:
            if not self.semaphore.acquire(self.semaphore_name):
                raise RuntimeError("acquire returned False")
            #logging.debug("analysis module {0} acquired semaphore {1}".format(self, self.semaphore_name))
        except Exception as e:
            logging.error("unable to acquire semaphore {} : {}".format(
                self.semaphore_name, e))
            report_exception()

            self.semaphore = None

            # TODO fall back to something else we can use
            return False

        return True

    def release_semaphore(self):
        """Release the acquired semaphore, or do nothing if this module does not use a semaphore."""
        if self.semaphore is None:
            return

        try:
            self.semaphore.release()
            #logging.debug("analysis module {0} released semaphore {1}".format(self, self.semaphore_name))
        except Exception as e:
            logging.error("unable to release semaphore: {0}".format(str(e)))
            report_exception()

    def analyze(self, obj, final_analysis=False):
        """Called by an analysis engine to analyze a given Analysis or Observable object."""

        assert isinstance(obj, Analysis) or isinstance(obj, Observable)

        # if we're watching any files, see if they've changed and need to be reloaded
        self.check_watched_files()

        # if we are executing in "final analysis mode" then we call this function instead
        if final_analysis:
            return self.execute_final_analysis(obj)
        else:
            return self.execute_analysis(obj)

    def cleanup(self):
        """Called after all analysis has completed. Override this if you need to clean up something after analysis."""
        pass

    def delay_analysis(self,
                       observable,
                       analysis,
                       hours=None,
                       minutes=None,
                       seconds=None,
                       timeout_hours=None,
                       timeout_minutes=None,
                       timeout_seconds=None):
        """Called to delay this analysis until the specified amount of time has expired."""
        if hours is None and minutes is None and seconds is None:
            hours = 0
            minutes = 0
            seconds = 10

        if hours is None:
            hours = 0

        if minutes is None:
            minutes = 0

        if seconds is None:
            seconds = 0

        logging.info(
            "adding delayed analysis for "
            "{} by {} on {} analysis {} hours {} minutes {} seconds {}".format(
                self.root, self, observable, analysis, hours, minutes,
                seconds))

        if self.engine.delay_analysis(self.root,
                                      observable,
                                      analysis,
                                      self,
                                      hours=hours,
                                      minutes=minutes,
                                      seconds=seconds,
                                      timeout_hours=timeout_hours,
                                      timeout_minutes=timeout_minutes,
                                      timeout_seconds=timeout_seconds):
            analysis.completed = False
            analysis.delayed = True
            return True

        analysis.completed = True
        analysis.delayed = False
        return False

    # THIS is what you override in your subclass!
    def execute_analysis(self, analysis):
        """Called to analyze Analysis or Observable objects. Override this in your subclass.
           Return True if analysis executed successfully (even if nothing was done.)
           Return False if analysis should not occur for this target.
           Returning False will cause the engine to skip any further analysis by this module for this target."""
        raise NotImplemented()

    def execute_final_analysis(self, analysis):
        """Called to analyze Analysis or Observable objects after all other analysis has completed."""
        return False

    def execute_pre_analysis(self):
        """This is called once at the very beginning of analysis."""
        pass

    def execute_post_analysis(self):
        """This is called after all analysis work has been performed and no outstanding work is left.
           If the function returns False then the function can possibly get called again if the analysis mode changes.
           If the function returns True then the function will not get called again."""
        return True

    def execute_threaded(self):
        """This is called on a thread if the module is configured as threaded."""
        pass

    def execute_threaded_loop_wrapper(self):
        try:
            self.execute_threaded_loop()
        finally:
            # make sure we remove the session if we created one
            saq.db.remove()

    def execute_threaded_loop(self):
        # continue to execute until analysis has completed
        while True:
            try:
                self.execute_threaded()
            except Exception as e:
                logging.error("{} failed threaded execution on {}: {}".format(
                    self, self.root, e))
                report_exception()
                return

            # wait for self.threaded_execution_frequency seconds before we execute again
            # make sure we exit when we're asked to
            timeout = self.threaded_execution_frequency
            while not self.engine.cancel_analysis_flag and \
                  not self.threaded_execution_stop_event.is_set() and \
                  timeout > 0:

                time.sleep(1)
                timeout -= 1

            if self.engine.cancel_analysis_flag:
                return

            if self.threaded_execution_stop_event.is_set():
                return

    def auto_reload(self):
        """Called every N seconds (see auto_reload_frequency in abstract
engine) in the main process to allow the module to update or change
configuration."""
        return

    def should_analyze(self, obj):
        """Put your custom "should I analyze this?" logic in this function."""
        return True

    def execute_maintenance(self):
        """Override this function to provide some kind of maintenance routine that is called every
           maintenance_frequency seconds."""
        pass

    @property
    def maintenance_frequency(self):
        """Returns how often to execute the maintenance function, in seconds, or None to disable (the default.)"""
        return None