예제 #1
    def __init__(self, *args, **kwargs):
        See :py:method`.BaseAnalyzer.__init__` for details.

        serialize_apks : bool, optional (default is True)
            If true, serialize .apk .
            Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
            Be sure to import the apks to the result db first!
        serialize_apks = kwargs.get("serialize_apks", True)

        super(DistributedAnalyzer, self).__init__(*args, **kwargs)

        # list(apk_path, _apk, is_apk)
        self.__apks = list(AnalyzeUtil.apk_gen(self.apks_or_paths))

        # result group
        self.group_result = None

        # serialize .apk data
        self.__serialize_apks = serialize_apks
        if serialize_apks:
            clilog.info("Will serialize .apk data!")
            clilog.info("Will send id of apks!")

        self.analyze_stats_view = None

        # stats view for cli
        self.analyze_stats_view = AnalysisStatsView(self._cnt_apks)
        self.analyze_stats_view.daemon = True

        # the `TaskCollection` for the analysis tasks
        self.task_collection = TaskCollection(self._cnt_apks)

        # register celery signals

        self.lock = Lock()
예제 #2
    def __init__(self, *args, **kwargs):
        See :py:method`.BaseAnalyzer.__init__` for details.

        serialize_apks : bool, optional (default is True)
            If true, serialize .apk .
            Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
            Be sure to import the apks to the result db first!
        serialize_apks = kwargs.get("serialize_apks", True)

        super(DistributedAnalyzer, self).__init__(*args, **kwargs)

        # list(apk_path, _apk, is_apk)
        self.__apks = list(AnalyzeUtil.apk_gen(self.apks_or_paths))

        # result group
        self.group_result = None

        # serialize .apk data
        self.__serialize_apks = serialize_apks
        if serialize_apks:
            clilog.info("Will serialize .apk data!")
            clilog.info("Will send id of apks!")

        self.analyze_stats_view = None

        # stats view for cli
        self.analyze_stats_view = AnalysisStatsView(self._cnt_apks)
        self.analyze_stats_view.daemon = True

        # the `TaskCollection` for the analysis tasks
        self.task_collection = TaskCollection(self._cnt_apks)

        # register celery signals

        self.lock = Lock()
예제 #3
class DistributedAnalyzer(BaseAnalyzer):
    ''' Distributed analyzer which uses celery.
    The analysis of each apk is seen as a single task and gets done by a worker which reads from an asynchronous message queue.
    def __init__(self, *args, **kwargs):
        See :py:method`.BaseAnalyzer.__init__` for details.

        serialize_apks : bool, optional (default is True)
            If true, serialize .apk .
            Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
            Be sure to import the apks to the result db first!
        serialize_apks = kwargs.get("serialize_apks", True)

        super(DistributedAnalyzer, self).__init__(*args, **kwargs)

        # list(apk_path, _apk, is_apk)
        self.__apks = list(AnalyzeUtil.apk_gen(self.apks_or_paths))

        # result group
        self.group_result = None

        # serialize .apk data
        self.__serialize_apks = serialize_apks
        if serialize_apks:
            clilog.info("Will serialize .apk data!")
            clilog.info("Will send id of apks!")

        self.analyze_stats_view = None

        # stats view for cli
        self.analyze_stats_view = AnalysisStatsView(self._cnt_apks)
        self.analyze_stats_view.daemon = True

        # the `TaskCollection` for the analysis tasks
        self.task_collection = TaskCollection(self._cnt_apks)

        # register celery signals

        self.lock = Lock()

    def get_lock(self):
        return self.__lock

    def set_lock(self, value):
        self.__lock = value

    def del_lock(self):
        del self.__lock

    def get_analyze_stats_view(self):
        return self.__analyze_stats_view

    def set_analyze_stats_view(self, value):
        self.__analyze_stats_view = value

    def del_analyze_stats_view(self):
        del self.__analyze_stats_view

    def get_serialize_apks(self):
        return self.__serialize_apks

    def get_apks(self):
        return self.__apks

    def set_apks(self, value):
        self.__apks = value

    def set_cnt_apks(self, value):
        self.__cnt_apks = value

    def del_apks(self):
        del self.__apks

    def del_cnt_apks(self):
        del self.__cnt_apks

    def get_group_result(self):
        return self.__group_result

    def set_group_result(self, value):
        self.__group_result = value

    def del_group_result(self):
        del self.__group_result

    group_result = property(get_group_result, set_group_result,
                            "GroupResult : The result collection object.")
    apks = property(
        get_apks, set_apks, del_apks,
        "list<tuple<str, Apk, bool>> : Path to .apk, instance of `Apk`, bool what determines if current element of apks_or_paths is an `Apk`"
    serialize_apks = property(
        get_serialize_apks, None, None,
        "bool : If true, serialize .apk. Otherwise id (hash) of the apk will be send and fetched by the worker from the result db."
    analyze_stats_view = property(
        get_analyze_stats_view, set_analyze_stats_view, del_analyze_stats_view,
        "AnalysisStatsView : Thread showing current analysis progress.")
    lock = property(get_lock, set_lock, del_lock, "Lock")


    def register_signals(self):
        ''' Register celery signals for task publishing '''
        # register signals

    #---Analysis progress display

    def stop_analysis_view(self):
        ''' Stop displaying the analysis progress and return the number of successful + failed tasks. '''
        if self.analyze_stats_view is not None:
            analyzed_cnt = None
            # terminate by using event
            if self.analyze_stats_view.isAlive():
                analyzed_cnt = self.analyze_stats_view.get_total_run_tasks()
                # wait for analysis view
            return analyzed_cnt
        return 0

    #---Shared memory stats

    def get_cnt_analyzed_apks(self):
        ''' Return the number of analyzed apks '''
        return self.analyze_stats_view.get_total_run_tasks_sm()

    def get_published_tasks_sm(self):
        ''' Return the number of published tasks '''
        return self.task_collection.send_tasks

    cnt_analyzed_apks = property(
        get_cnt_analyzed_apks, lambda s: s.set_cnt_analyzed_apks,
        lambda s: s.del_cnt_analyzed_apks,
        "Value<int> : Shared memory integer showing the count of already analyzed apks"

    #---Task arguments generators

    def send_id_args_generator(self, apk_gen):
        ''' Generator over arguments for sending of apk id.

        generator<tuple<object, bool>>
            Generator over zip files or ids.
            Second component of tuples indicates that the generator is other the id's
            rather than over the zip files.
            See :py:method:`.AnalyzeUtil.apk_id_or_raw_data_gen` to get such a generator.
        # get package names from initialized scripts
        script_packages = Util.module_names_from_class(self.script_list)

        for apk_zipfile_or_hash, is_id, fastapk in apk_gen:
            yield script_packages, self.min_script_needs, self.script_hashes, apk_zipfile_or_hash, is_id, fastapk

    def send_apk_args_generator(self, apk_gen):
        ''' Generator over arguments for sending APKs.

        generator<tuple<object, bool>>
            Generator over zip files or ids.
            Second component of tuples indicates that the generator is other the id's
            rather than over the zip files.
            See :py:method:`.AnalyzeUtil.apk_id_or_raw_data_gen` to get such a generator.
        # get package names from initialized scripts
        script_packages = Util.module_names_from_class(self.script_list)

        for apk_zipfile_or_hash, is_id, fast_apk in apk_gen:
            yield script_packages, self.min_script_needs, self.script_hashes, apk_zipfile_or_hash, is_id, fast_apk


    def _analyze(self):
        ''' See doc of :py:method:`.BaseAnalyzer.analyze`. '''

        # try to get registered workers
        # it network fails at this point -> stop analysis
        except NetworkError as e:
            return 0

        # storage objects
        storage = self.storage

        clilog.info("Number of apks to analyze: %d", self._cnt_apks)

            # get analyze task
            analyze_task = tasks[CeleryConstants.get_analyze_task_name()]

            # create storage

            # send tasks
            start = time()

            # apk generator over .apk or apk hashes
            apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(
                self.apks, force_raw_data=self.serialize_apks)

            clilog.info("Task publishing progress:")

            # send and serialize .apks
            # if analysis via path serialize them!
            if self.serialize_apks:
                log.info("sending .apks to message broker")
                self.group_result = group_result = GroupResult(results=[])

                for args in self.send_apk_args_generator(apk_gen):
                    task = analyze_task.delay(*args)

            # send only apk id and let fetch via mongodb
                log.info("sending ids of apks")

                task_group = group(
                     for args in self.send_id_args_generator(apk_gen)))

                # publish tasks
                self.group_result = task_group()

            log.info("sending took %ss", (time() - start))
            sys.stderr.write("\nAnalysis progress:\n")

            # start showing analysis progress

            # wait for results
            log.debug("joining on ResultGroup ... ")

            # setup callback
            callback_func = self.get_callback_func(self.success_handler,

            clilog.info("\nanalysis done ... ")
            log.info("distributed analysis took %ss", (time() - start))

            return self.stop_analysis_view()
        except DatabaseOpenError as e:
            return 0

        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                "Interrupting distributed analysis ... Please wait a moment!")
            log.warn("revoking tasks on all workers ...")

            if celerysettings.CELERY_TASK_REVOCATION_ENABLED:
                # revoke tasks
                if self.group_result is None:
                    # revoke via task ids
                    log.debug("revoking while publishing tasks ...")

                    # revoke via GroupResult if yet available/created
                    # first available after all tasks have been send
                    self.group_result.revoke(terminate=True, signal='SIGKILL')
                log.warn("revoked tasks and killed workers ...")

            #return number of analyzed apks
            return self.stop_analysis_view()


    def error_handler(self, task_id, error_msg, state, traceback=None):
        ''' Handler for a failed task.

        task_id : str
            UUID of task.
        error_msg : str
            Error message.
        state : See `state` module
            State of the task.
        traceback : str
            The traceback if error occurred.
        # progress view does not print a newline, so do it here

        # log error msg

        # print traceback if available
        if traceback:

    def success_handler(self, task_id, result):
        ''' Handler for a successful task.
        Fetches the result from the result database and stores it in the file system. '''
        if result is not None:
            # keep ids of mongodb entries
            # result may hold multiple results
            for res in result:

            # store analysis results
            # doesn't raise a DatabaseLoadException due to wait_for_db
            self.storage.fetch_results_from_mongodb(result, wait_for_db=True)

    def get_callback_func(self, handle_success, handle_error=None):
        Callback function for task finish.
        Store results and display progress

        handle_success : func
            Function for the success of a task.
            E.g. handle_success(task_id, result) with type:
            handle_success: str -> object -> object
        handle_error : func, optional (default is None)
            Function that gets the task id and error message for a task that failed.
            E.g. handle_error(task_id, error_msg, state, traceback = None)
            with type handle_error: str -> str -> str -> str -> object.

        function<str, object>
        def callback(task_id, result_dict):
            task_id : str
                UUID of task.
            result_dict : dict
                Dictionary holding the meta infos about the task as well as the result.
                See `CeleryConstants.CELERY_RESULT_BACKEND_*` for some available keys.
            log.debug("Task %s finished", task_id)

            result = result_dict[
            traceback = result_dict[
            state = result_dict[

            task_failed = state in states.EXCEPTION_STATES

            # show exceptions
            if task_failed:

                # handle error
                if handle_error is not None:
                    handle_error(task_id, result, state, traceback)

                # we need locking here because operation is not atomic
                with self.lock:
                    self.analyze_stats_view.failed_tasks += 1

                if handle_success is not None:
                    handle_success(task_id, result)

                # we need locking here because operation is not atomic
                with self.lock:
                    self.analyze_stats_view.successful_tasks += 1

        return callback


    def before_task_publish_action(self, *args, **kwargs):
        ''' Collect task ids before they get published '''
        task_id = kwargs["body"]["id"]
        log.debug("will publish task %s", task_id)

    def after_task_publish_action(
            # take unknown keywords for newer APIs
        Inform user about published tasks.

        Function will be executed on the task sender after the task has been published.

        exchange : str
        body : dict, optional (default is None)
            The task message body, see Task Messages for a reference of possible fields that can be defined.
        routing_key : str
        signal : signal.Signal
        sender : str

        See Also

        task_id = body["id"]
            "Send tasks: %d, current task id: %s, queue: %s" %
            (self.task_collection.send_tasks.value, task_id, routing_key))


    def all_tasks_published(self):
        ''' Check if all tasks have been published '''
        return self.task_collection.all_tasks_published()
예제 #4
class DistributedAnalyzer(BaseAnalyzer):
    ''' Distributed analyzer which uses celery.
    The analysis of each apk is seen as a single task and gets done by a worker which reads from an asynchronous message queue.

    def __init__(self, *args, **kwargs):
        See :py:method`.BaseAnalyzer.__init__` for details.

        serialize_apks : bool, optional (default is True)
            If true, serialize .apk .
            Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
            Be sure to import the apks to the result db first!
        serialize_apks = kwargs.get("serialize_apks", True)

        super(DistributedAnalyzer, self).__init__(*args, **kwargs)

        # list(apk_path, _apk, is_apk)
        self.__apks = list(AnalyzeUtil.apk_gen(self.apks_or_paths))

        # result group
        self.group_result = None

        # serialize .apk data
        self.__serialize_apks = serialize_apks
        if serialize_apks:
            clilog.info("Will serialize .apk data!")
            clilog.info("Will send id of apks!")

        self.analyze_stats_view = None

        # stats view for cli
        self.analyze_stats_view = AnalysisStatsView(self._cnt_apks)
        self.analyze_stats_view.daemon = True

        # the `TaskCollection` for the analysis tasks
        self.task_collection = TaskCollection(self._cnt_apks)

        # register celery signals

        self.lock = Lock()

    def get_lock(self):
        return self.__lock

    def set_lock(self, value):
        self.__lock = value

    def del_lock(self):
        del self.__lock

    def get_analyze_stats_view(self):
        return self.__analyze_stats_view

    def set_analyze_stats_view(self, value):
        self.__analyze_stats_view = value

    def del_analyze_stats_view(self):
        del self.__analyze_stats_view

    def get_serialize_apks(self):
        return self.__serialize_apks

    def get_apks(self):
        return self.__apks

    def set_apks(self, value):
        self.__apks = value

    def set_cnt_apks(self, value):
        self.__cnt_apks = value

    def del_apks(self):
        del self.__apks

    def del_cnt_apks(self):
        del self.__cnt_apks

    def get_group_result(self):
        return self.__group_result

    def set_group_result(self, value):
        self.__group_result = value

    def del_group_result(self):
        del self.__group_result

    group_result = property(get_group_result, set_group_result, del_group_result, "GroupResult : The result collection object.")
    apks = property(get_apks, set_apks, del_apks, "list<tuple<str, Apk, bool>> : Path to .apk, instance of `Apk`, bool what determines if current element of apks_or_paths is an `Apk`")
    serialize_apks = property(get_serialize_apks, None, None, "bool : If true, serialize .apk. Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.")
    analyze_stats_view = property(get_analyze_stats_view, set_analyze_stats_view, del_analyze_stats_view, "AnalysisStatsView : Thread showing current analysis progress.")
    lock = property(get_lock, set_lock, del_lock, "Lock")


    def register_signals(self):
        ''' Register celery signals for task publishing '''
        # register signals
        before_task_publish.connect(self.before_task_publish_action, sender = CeleryConstants.get_analyze_task_name())
        after_task_publish.connect(self.after_task_publish_action, sender = CeleryConstants.get_analyze_task_name())

    #---Analysis progress display

    def stop_analysis_view(self):
        ''' Stop displaying the analysis progress and return the number of successful + failed tasks. '''
        if self.analyze_stats_view is not None:
            analyzed_cnt = None
            # terminate by using event
            if self.analyze_stats_view.isAlive():
                analyzed_cnt = self.analyze_stats_view.get_total_run_tasks()
                # wait for analysis view
            return analyzed_cnt
        return 0

    #---Shared memory stats

    def get_cnt_analyzed_apks(self):
        ''' Return the number of analyzed apks '''
        return self.analyze_stats_view.get_total_run_tasks_sm()

    def get_published_tasks_sm(self):
        ''' Return the number of published tasks '''
        return self.task_collection.send_tasks

    cnt_analyzed_apks = property(get_cnt_analyzed_apks, lambda s: s.set_cnt_analyzed_apks, lambda s: s.del_cnt_analyzed_apks, "Value<int> : Shared memory integer showing the count of already analyzed apks")

    #---Task arguments generators

    def send_id_args_generator(self, apk_gen):
        ''' Generator over arguments for sending of apk id.

        generator<tuple<object, bool>>
            Generator over zip files or ids.
            Second component of tuples indicates that the generator is other the id's
            rather than over the zip files.
            See :py:method:`.AnalyzeUtil.apk_id_or_raw_data_gen` to get such a generator.
        # get package names from initialized scripts
        script_packages = Util.module_names_from_class(self.script_list)

        for apk_zipfile_or_hash, is_id, fastapk in apk_gen:
            yield script_packages, self.min_script_needs, self.script_hashes, apk_zipfile_or_hash, is_id, fastapk

    def send_apk_args_generator(self, apk_gen):
        ''' Generator over arguments for sending APKs.

        generator<tuple<object, bool>>
            Generator over zip files or ids.
            Second component of tuples indicates that the generator is other the id's
            rather than over the zip files.
            See :py:method:`.AnalyzeUtil.apk_id_or_raw_data_gen` to get such a generator.
        # get package names from initialized scripts
        script_packages = Util.module_names_from_class(self.script_list)

        for apk_zipfile_or_hash, is_id, fast_apk in apk_gen:
            yield script_packages, self.min_script_needs, self.script_hashes, apk_zipfile_or_hash, is_id, fast_apk


    def _analyze(self):
        ''' See doc of :py:method:`.BaseAnalyzer.analyze`. '''

        # try to get registered workers
        # it network fails at this point -> stop analysis
        except NetworkError as e:
            return 0

        # storage objects
        storage = self.storage

        clilog.info("Number of apks to analyze: %d", self._cnt_apks)

            # get analyze task
            analyze_task = tasks[CeleryConstants.get_analyze_task_name()]

            # create storage

            # send tasks
            start = time()

            # apk generator over .apk or apk hashes
            apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(self.apks, force_raw_data = self.serialize_apks)

            clilog.info("Task publishing progress:")

            # send and serialize .apks
            # if analysis via path serialize them!
            if self.serialize_apks:
                log.info("sending .apks to message broker")
                self.group_result = group_result = GroupResult(results = [])

                for args in self.send_apk_args_generator(apk_gen):
                    task = analyze_task.delay(*args)

            # send only apk id and let fetch via mongodb
                log.info("sending ids of apks")

                task_group = group((analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen)))

                # publish tasks
                self.group_result = task_group()

            log.info("sending took %ss", (time() - start))
            sys.stderr.write("\nAnalysis progress:\n")

            # start showing analysis progress

            # wait for results
            log.debug("joining on ResultGroup ... ")

            # setup callback
            callback_func = self.get_callback_func(self.success_handler, self.error_handler)
            CeleryUtil.join_native(self.group_result, propagate = False, callback = callback_func)

            clilog.info("\nanalysis done ... ")
            log.info("distributed analysis took %ss", (time() - start))

            return self.stop_analysis_view()
        except DatabaseOpenError as e:
            return 0

        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
            log.warn("Interrupting distributed analysis ... Please wait a moment!")
            log.warn("revoking tasks on all workers ...")

            if celerysettings.CELERY_TASK_REVOCATION_ENABLED:
                # revoke tasks
                if self.group_result is None:
                    # revoke via task ids
                    log.debug("revoking while publishing tasks ...")

                    self.task_collection.revoke_all(terminate = True, signal = 'SIGKILL')
                    # revoke via GroupResult if yet available/created
                    # first available after all tasks have been send
                    self.group_result.revoke(terminate = True, signal = 'SIGKILL')
                log.warn("revoked tasks and killed workers ...")

            #return number of analyzed apks
            return self.stop_analysis_view()


    def error_handler(self, task_id, error_msg, state, traceback = None):
        ''' Handler for a failed task.

        task_id : str
            UUID of task.
        error_msg : str
            Error message.
        state : See `state` module
            State of the task.
        traceback : str
            The traceback if error occurred.
        # progress view does not print a newline, so do it here

        # log error msg

        # print traceback if available
        if traceback:

    def success_handler(self, task_id, result):
        ''' Handler for a successful task.
        Fetches the result from the result database and stores it in the file system. '''
        if result is not None:
            # keep ids of mongodb entries
            # result may hold multiple results
            for res in result:

            # store analysis results
            # doesn't raise a DatabaseLoadException due to wait_for_db
            self.storage.fetch_results_from_mongodb(result, wait_for_db = True)

    def get_callback_func(self, handle_success, handle_error = None):
        Callback function for task finish.
        Store results and display progress

        handle_success : func
            Function for the success of a task.
            E.g. handle_success(task_id, result) with type:
            handle_success: str -> object -> object
        handle_error : func, optional (default is None)
            Function that gets the task id and error message for a task that failed.
            E.g. handle_error(task_id, error_msg, state, traceback = None)
            with type handle_error: str -> str -> str -> str -> object.

        function<str, object>

        def callback(task_id, result_dict):
            task_id : str
                UUID of task.
            result_dict : dict
                Dictionary holding the meta infos about the task as well as the result.
                See `CeleryConstants.CELERY_RESULT_BACKEND_*` for some available keys.
            log.debug("Task %s finished", task_id)

            result = result_dict[CeleryConstants.CELERY_RESULT_BACKEND_KEY_RESULT]
            traceback = result_dict[CeleryConstants.CELERY_RESULT_BACKEND_KEY_TRACEBACK]
            state = result_dict[CeleryConstants.CELERY_RESULT_BACKEND_KEY_STATUS]

            task_failed = state in states.EXCEPTION_STATES

            # show exceptions
            if task_failed:

                # handle error
                if handle_error is not None:
                    handle_error(task_id, result, state, traceback)

                # we need locking here because operation is not atomic
                with self.lock:
                    self.analyze_stats_view.failed_tasks += 1

                if handle_success is not None:
                    handle_success(task_id, result)

                # we need locking here because operation is not atomic
                with self.lock:
                    self.analyze_stats_view.successful_tasks += 1

        return callback


    def before_task_publish_action(self, *args, **kwargs):
        ''' Collect task ids before they get published '''
        task_id = kwargs["body"]["id"]
        log.debug("will publish task %s", task_id)

    def after_task_publish_action(self, exchange=None, body=None, routing_key = None, signal = None, sender = None,
                                  # take unknown keywords for newer APIs
        Inform user about published tasks.

        Function will be executed on the task sender after the task has been published.

        exchange : str
        body : dict, optional (default is None)
            The task message body, see Task Messages for a reference of possible fields that can be defined.
        routing_key : str
        signal : signal.Signal
        sender : str

        See Also

        task_id = body["id"]
        Util.print_dyn_progress("Send tasks: %d, current task id: %s, queue: %s" % (self.task_collection.send_tasks.value, task_id, routing_key))


    def all_tasks_published(self):
        ''' Check if all tasks have been published '''
        return self.task_collection.all_tasks_published()