Beispiel #1
0
    def analyze_apk(self, eandro_apk):
        ''' Analyze the `eandro_apk` and return the analysis results.

        Parameters
        ----------
        eandro_apk : EAndroApk
            The apk to analyze.

        Returns
        -------
        list<FastApk, AndroScript>
        None
            If error happened.
        '''
        if eandro_apk is not None:

            # analysis
            res = AnalyzeUtil.analyze_apk(eandro_apk, self.androscripts, self.min_script_needs, reset_scripts = True)

            if res is not None:

                fastapk, scripts = res

                # we need to backup the scripts cause they will be reused for a new analysis
                res[1] = deepcopy(scripts)

                clilog.debug("analyzed %s", fastapk.short_description())

                return res
Beispiel #2
0
    def get_apk(self, _hash, apk = None, **kwargs):
        '''
        Get the `EAndroApk` from `_hash`.

        Parameters
        ----------
        _hash : str
            Hash of the .apk (sha256)
        apk : Apk
            Carries metainformation needed to build the whole path to the element in S3.

        Raises
        ------
        S3StorageLoadException

        Returns
        -------
        EAndroApk
            Apk constructed from raw data and meta infos.
        '''
        # use to hold apk meta infos
        #fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag)
        try:
            apk_raw = BytesIO()
            if apk is None:
                raise S3StorageLoadException(self, content = "Apk:%s" % apk.short_description(), caused_by = RuntimeError("No APK metainformation given!")), None, sys.exc_info()[2]
            _id = Util.get_apk_path_incl_filename(apk)
            self.bucket_get(_id).get_contents_to_file(apk_raw)
            apk_raw.seek(0)
            eandro_apk = AnalyzeUtil.open_apk(apk_raw.read(), None, raw = True)
            return eandro_apk 
        except (BotoClientError, S3ResponseError) as e:
            raise S3StorageLoadException(self, content = "Apk:%s" % apk.short_description(), caused_by = e), None, sys.exc_info()[2]
Beispiel #3
0
    def analyze_apk(self, eandro_apk):
        ''' Analyze the `eandro_apk` and return the analysis results.

        Parameters
        ----------
        eandro_apk : EAndroApk
            The apk to analyze.

        Returns
        -------
        list<FastApk, AndroScript>
        None
            If error happened.
        '''
        if eandro_apk is not None:

            # analysis
            res = AnalyzeUtil.analyze_apk(eandro_apk,
                                          self.androscripts,
                                          self.min_script_needs,
                                          reset_scripts=True)

            if res is not None:

                fastapk, scripts = res

                # we need to backup the scripts cause they will be reused for a new analysis
                res[1] = deepcopy(scripts)

                clilog.debug("analyzed %s", fastapk.short_description())

                return res
Beispiel #4
0
    def __store_results(self, results):
        ''' Store the results and increase the analyzed apks counter.

        Parameters
        ----------
        results : list<FastApk, AndroScript>
        '''
        for res in results:

            # unpack results
            fastapk, script_results = res

            for script in script_results:
                try:
                    storage_result = AnalyzeUtil.store_script_res(self.storage, script, fastapk)
                    self.add_storage_result(storage_result)
                except StorageException as e:
                    log.warn(e)

            self.add_analyzed_apks_sm(1)
    def get_apk(self, _hash, **kwargs):
        '''
        Get the `EAndroApk` from `_hash`.

        Parameters
        ----------
        _hash : str
            Hash of the .apk (sha256)

        Raises
        ------
        DatabaseLoadException
        NoFile
            If the file is not present.

        Returns
        -------
        EAndroApk
            Apk constructed from raw data and meta infos.
        '''
        try:
            gridfs = self.__apk_coll
            log.info("getting apk: %s from mongodb ...", _hash)
            gridfs_obj = gridfs.get(_hash)
            # get raw .apk
            apk_zipfile = gridfs_obj.read()

            # get apk meta infos
            apk_meta = gridfs_obj.metadata
            package_name, version_name, path, _hash, import_date, tag = apk_meta[RESOBJ_APK_META_PACKAGE_NAME], apk_meta[RESOBJ_APK_META_VERSION_NAME], apk_meta[RESOBJ_APK_META_PATH], apk_meta[RESOBJ_APK_META_HASH], apk_meta[RESOBJ_APK_META_IMPORT_DATE], apk_meta[RESOBJ_APK_META_TAG]

            # use to hold apk meta infos
            fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag)
            eandro_apk = AnalyzeUtil.open_apk(apk_zipfile, fast_apk, raw = True)

            log.info("got apk")
            return eandro_apk
        except NoFile:
            raise
        except PyMongoError as e:
            raise DatabaseLoadException(self, content = "Apk (hash=%s)" % _hash, caused_by = e), None, sys.exc_info()[2]
    def __init__(self, *args, **kwargs):
        '''
        See :py:method`.BaseAnalyzer.__init__` for details.

        Parameters
        ----------
        serialize_apks : bool, optional (default is True)
            If true, serialize .apk .
            Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
            Be sure to import the apks to the result db first!
        '''
        serialize_apks = kwargs.get("serialize_apks", True)

        super(DistributedAnalyzer, self).__init__(*args, **kwargs)

        # list(apk_path, _apk, is_apk)
        self.__apks = list(AnalyzeUtil.apk_gen(self.apks_or_paths))

        # result group
        self.group_result = None

        # serialize .apk data
        self.__serialize_apks = serialize_apks
        if serialize_apks:
            clilog.info("Will serialize .apk data!")
        else:
            clilog.info("Will send id of apks!")

        self.analyze_stats_view = None

        # stats view for cli
        self.analyze_stats_view = AnalysisStatsView(self._cnt_apks)
        self.analyze_stats_view.daemon = True

        # the `TaskCollection` for the analysis tasks
        self.task_collection = TaskCollection(self._cnt_apks)

        # register celery signals
        self.register_signals()

        self.lock = Lock()
    def __init__(self, *args, **kwargs):
        '''
        See :py:method`.BaseAnalyzer.__init__` for details.

        Parameters
        ----------
        serialize_apks : bool, optional (default is True)
            If true, serialize .apk .
            Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
            Be sure to import the apks to the result db first!
        '''
        serialize_apks = kwargs.get("serialize_apks", True)

        super(DistributedAnalyzer, self).__init__(*args, **kwargs)

        # list(apk_path, _apk, is_apk)
        self.__apks = list(AnalyzeUtil.apk_gen(self.apks_or_paths))

        # result group
        self.group_result = None

        # serialize .apk data
        self.__serialize_apks = serialize_apks
        if serialize_apks:
            clilog.info("Will serialize .apk data!")
        else:
            clilog.info("Will send id of apks!")

        self.analyze_stats_view = None

        # stats view for cli
        self.analyze_stats_view = AnalysisStatsView(self._cnt_apks)
        self.analyze_stats_view.daemon = True

        # the `TaskCollection` for the analysis tasks
        self.task_collection = TaskCollection(self._cnt_apks)

        # register celery signals
        self.register_signals()

        self.lock = Lock()
Beispiel #8
0
    def __store_results(self, results):
        ''' Store the results and increase the analyzed apks counter.

        Parameters
        ----------
        results : list<FastApk, AndroScript>
        '''
        for res in results:

            # unpack results
            fastapk, script_results = res

            for script in script_results:
                try:
                    storage_result = AnalyzeUtil.store_script_res(
                        self.storage, script, fastapk)
                    self.add_storage_result(storage_result)
                except StorageException as e:
                    log.warn(e)

            self.add_analyzed_apks_sm(1)
Beispiel #9
0
    def get_apk(self, _hash, apk=None, **kwargs):
        '''
        Get the `EAndroApk` from `_hash`.

        Parameters
        ----------
        _hash : str
            Hash of the .apk (sha256)
        apk : Apk
            Carries metainformation needed to build the whole path to the element in S3.

        Raises
        ------
        S3StorageLoadException

        Returns
        -------
        EAndroApk
            Apk constructed from raw data and meta infos.
        '''
        # use to hold apk meta infos
        #fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag)
        try:
            apk_raw = BytesIO()
            if apk is None:
                raise S3StorageLoadException(
                    self,
                    content="Apk:%s" % apk.short_description(),
                    caused_by=RuntimeError("No APK metainformation given!"
                                           )), None, sys.exc_info()[2]
            _id = Util.get_apk_path_incl_filename(apk)
            self.bucket_get(_id).get_contents_to_file(apk_raw)
            apk_raw.seek(0)
            eandro_apk = AnalyzeUtil.open_apk(apk_raw.read(), None, raw=True)
            return eandro_apk
        except (BotoClientError, S3ResponseError) as e:
            raise S3StorageLoadException(self,
                                         content="Apk:%s" %
                                         apk.short_description(),
                                         caused_by=e), None, sys.exc_info()[2]
Beispiel #10
0
    def run(self):
        work_queue = self.work_queue

        try:
            for work in iter(work_queue.get, STOP_SENTINEL):
                try:
                    apk_path, _apk, _ = work

                    eandro_apk = AnalyzeUtil.open_apk(apk_path, apk=_apk)

                    # do the analysis
                    res = self.analyze_apk(eandro_apk)

                    # remember yet analyzed APKs
                    if eandro_apk:
                        self.analyzed_apks.put(
                            FastApk.load_from_eandroapk(eandro_apk))

                    # collect results
                    if res is not None:
                        self.__store_results([res])
                    else:
                        # increment analyzed apks counter
                        self.add_analyzed_apks_sm(1)

                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    log.exception(e)
                finally:
                    # signal one task done
                    work_queue.task_done()

            # signal sentinel read
            work_queue.task_done()

            work_queue.close()
        # be silent
        except KeyboardInterrupt:
            pass
Beispiel #11
0
    def run(self):
        work_queue = self.work_queue

        try:
            for work in iter(work_queue.get, STOP_SENTINEL):
                try:
                    apk_path, _apk, _ = work
    
                    eandro_apk = AnalyzeUtil.open_apk(apk_path, apk=_apk)
    
                    # do the analysis
                    res = self.analyze_apk(eandro_apk)
    
                    # remember yet analyzed APKs
                    if eandro_apk:
                        self.analyzed_apks.put(FastApk.load_from_eandroapk(eandro_apk))
                    
                    # collect results
                    if res is not None:
                        self.__store_results([res])
                    else:
                        # increment analyzed apks counter
                        self.add_analyzed_apks_sm(1)
                        
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    log.exception(e)
                finally:
                    # signal one task done
                    work_queue.task_done()
    
            # signal sentinel read
            work_queue.task_done()
    
            work_queue.close()
        # be silent
        except KeyboardInterrupt:
            pass
Beispiel #12
0
    def _analyze(self, test = False):
        '''
        Start the analysis and store the results in the predefined place.

        Parameters
        ----------
        test : bool, optional (default is False)
            Use for testing. Will not store any result !

        Returns
        -------
        int
            Number of analyzed apks
        list<ResultObject>
            List of the results (only if `test`)
        '''
        androscripts = self.script_list

        # collect results for test mode
        test_results = []

        # get minimum options for all scripts -> boost performance
        # use only as much options as needed!

        # run over apks
        for apk_path, _apk, _ in apk_gen(self.apks_or_paths):

            eandro_apk = open_apk(apk_path, apk=_apk)

            # if is None error happened and has been logged
            # otherwise proceed with analysis
            if eandro_apk is not None:

                # tuple<FastApk, AndroScript>
                res = AnalyzeUtil.analyze_apk(eandro_apk, androscripts, self.min_script_needs, reset_scripts = True)

                if res:
                    # unpack results
                    fastapk, script_results = res

                    # store results if not in test mode
                    if not test:
                        for script in script_results:

                            try:
                                storage_result = AnalyzeUtil.store_script_res(self.storage, script, fastapk)
                                # keep storage results
                                self.add_storage_result(storage_result)
                            except StorageException as e:
                                log.warn(e)
                    else:
                        # deliver result object in testing mode
                        test_results += [s.res for s in script_results]

                    clilog.info("analyzed %s", fastapk.short_description())

                # increment counter, no lock needed, nobody else is writing to this value
                self.cnt_analyzed_apks.value += 1

        if test:
            return test_results

        return self.cnt_analyzed_apks.value
Beispiel #13
0
    def run(self, androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash = True, fast_apk = None):
        '''
        Do the analysis on the apk with the given scripts.

        Parameters
        ----------
        androscripts : list<str>
            List of package names.
        script_hashes : list<str>
            If given, set the hash for the `AndroScript`s
        min_script_needs : tuple<bool>
            See :py:method:`ScriptUtil.get_maximal_script_options`.
        apk_zipfile_or_hash : str
            The raw contents of the .apk file or the hash (sha256).
            The raw content of the .apk file (zipfile) or the hash of it (id in db).
        is_hash : bool, optional (default is True)
            Determines if `apk_zipfile_or_hash` is a hash (id).
        fast_apk : FastApk, optional (default is None)
            Holds the meta infos for the apk.

        Returns
        -------
        tuple<tuple<str, bool>>
            First component is the id of the entry
            and the second a boolean indication if the result has been stored in gridfs.
        ()
            If an error occurred.
        '''
        try:
            # method retry_arguments
            self.__retry_arguments = androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash, fast_apk
            eandro_apk = None
            do_script_hash_validation = settings.script_hash_validation_enabled()

            # open database/apk storage if not already done
            # reschedule job if connection/open error
            self.__open_db()
            self.__open_apk_storage()

            # setup scripts
            if do_script_hash_validation:
                # validate sent hashes with local script hashes
                self.__setup_scripts_hash_validation(androscripts, script_hashes)
            else:
                # reuse if possible
                self.__setup_scripts_reuse(androscripts, script_hashes)

            # open apk
            if not is_hash:
                log.info("opening apk via raw data ... ")
                eandro_apk = AnalyzeUtil.open_apk(apk_or_path = apk_zipfile_or_hash, apk = fast_apk, raw = True)
            else:
                # get apk from prefetched apk pool
                eandro_apk = apk_prefetch_pool.get(apk_zipfile_or_hash, None)
                # could not prefetch
                if eandro_apk is None:
                    eandro_apk = self.__get_apk_from_storage_retry(apk_zipfile_or_hash, apk = fast_apk)

            # if None, could not be opened and error has been logged
            if eandro_apk is not None:
                result = AnalyzeUtil.analyze_apk(eandro_apk, self.androscripts, min_script_needs, propagate_error = False, reset_scripts = not do_script_hash_validation)

                if result is not None:
                    fastapk, script_results = result

                    log.info("analyzed %s", fastapk.short_description())
                    storage_results = self.__store_results(fastapk, script_results)
                    # can be None if errorr occurred
                    if storage_results:
                        return tuple(storage_results)

            return ()
        except SoftTimeLimitExceeded:
            log.warn("Task %s exceeded it's soft time limit!", self)
            raise
        except ScriptHashValidationError:
            raise
        finally:
            # delete from pool -> we don't need it anymore in the pool
            if is_hash and apk_zipfile_or_hash in apk_prefetch_pool:
                del apk_prefetch_pool[apk_zipfile_or_hash]
    def _analyze(self):
        ''' See doc of :py:method:`.BaseAnalyzer.analyze`. '''

        # try to get registered workers
        # it network fails at this point -> stop analysis
        try:
            clilog.info(CeleryUtil.get_workers_and_check_network())
        except NetworkError as e:
            log.critical(e)
            return 0

        # storage objects
        storage = self.storage

        clilog.info("Number of apks to analyze: %d", self._cnt_apks)

        try:
            # get analyze task
            analyze_task = tasks[CeleryConstants.get_analyze_task_name()]

            # create storage
            storage.create_or_open_sub_storages()

            # send tasks
            start = time()

            # apk generator over .apk or apk hashes
            apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(self.apks, force_raw_data = self.serialize_apks)

            clilog.info("Task publishing progress:")

            # send and serialize .apks
            # if analysis via path serialize them!
            if self.serialize_apks:
                log.info("sending .apks to message broker")
                self.group_result = group_result = GroupResult(results = [])

                for args in self.send_apk_args_generator(apk_gen):
                    task = analyze_task.delay(*args)
                    group_result.add(task)

            # send only apk id and let fetch via mongodb
            else:
                log.info("sending ids of apks")

                task_group = group((analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen)))

                # publish tasks
                self.group_result = task_group()

            log.info("sending took %ss", (time() - start))
            sys.stderr.write("\nAnalysis progress:\n")

            # start showing analysis progress
            self.analyze_stats_view.start()

            # wait for results
            log.debug("joining on ResultGroup ... ")

            # setup callback
            callback_func = self.get_callback_func(self.success_handler, self.error_handler)
            CeleryUtil.join_native(self.group_result, propagate = False, callback = callback_func)

            clilog.info("\nanalysis done ... ")
            log.info("distributed analysis took %ss", (time() - start))

            return self.stop_analysis_view()
        except DatabaseOpenError as e:
            log.critical(e)
            return 0

        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                log.exception(e)
            log.warn("Interrupting distributed analysis ... Please wait a moment!")
            log.warn("revoking tasks on all workers ...")

            if celerysettings.CELERY_TASK_REVOCATION_ENABLED:
                # revoke tasks
                if self.group_result is None:
                    # revoke via task ids
                    log.debug("revoking while publishing tasks ...")

                    self.task_collection.revoke_all(terminate = True, signal = 'SIGKILL')
                else:
                    # revoke via GroupResult if yet available/created
                    # first available after all tasks have been send
                    self.group_result.revoke(terminate = True, signal = 'SIGKILL')
                log.warn("revoked tasks and killed workers ...")

            #return number of analyzed apks
            return self.stop_analysis_view()
Beispiel #15
0
    def _analyze(self):
        ''' See doc of :py:method:`.BaseAnalyzer.analyze`. '''

        # try to get registered workers
        # it network fails at this point -> stop analysis
        try:
            clilog.info(CeleryUtil.get_workers_and_check_network())
        except NetworkError as e:
            log.critical(e)
            return 0

        # storage objects
        storage = self.storage

        clilog.info("Number of apks to analyze: %d", self._cnt_apks)

        try:
            # get analyze task
            analyze_task = tasks[CeleryConstants.get_analyze_task_name()]

            # create storage
            storage.create_or_open_sub_storages()

            # send tasks
            start = time()

            # apk generator over .apk or apk hashes
            apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(
                self.apks, force_raw_data=self.serialize_apks)

            clilog.info("Task publishing progress:")

            # send and serialize .apks
            # if analysis via path serialize them!
            if self.serialize_apks:
                log.info("sending .apks to message broker")
                self.group_result = group_result = GroupResult(results=[])

                for args in self.send_apk_args_generator(apk_gen):
                    task = analyze_task.delay(*args)
                    group_result.add(task)

            # send only apk id and let fetch via mongodb
            else:
                log.info("sending ids of apks")

                task_group = group(
                    (analyze_task.s(*args)
                     for args in self.send_id_args_generator(apk_gen)))

                # publish tasks
                self.group_result = task_group()

            log.info("sending took %ss", (time() - start))
            sys.stderr.write("\nAnalysis progress:\n")

            # start showing analysis progress
            self.analyze_stats_view.start()

            # wait for results
            log.debug("joining on ResultGroup ... ")

            # setup callback
            callback_func = self.get_callback_func(self.success_handler,
                                                   self.error_handler)
            CeleryUtil.join_native(self.group_result,
                                   propagate=False,
                                   callback=callback_func)

            clilog.info("\nanalysis done ... ")
            log.info("distributed analysis took %ss", (time() - start))

            return self.stop_analysis_view()
        except DatabaseOpenError as e:
            log.critical(e)
            return 0

        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                log.exception(e)
            log.warn(
                "Interrupting distributed analysis ... Please wait a moment!")
            log.warn("revoking tasks on all workers ...")

            if celerysettings.CELERY_TASK_REVOCATION_ENABLED:
                # revoke tasks
                if self.group_result is None:
                    # revoke via task ids
                    log.debug("revoking while publishing tasks ...")

                    self.task_collection.revoke_all(terminate=True,
                                                    signal='SIGKILL')
                else:
                    # revoke via GroupResult if yet available/created
                    # first available after all tasks have been send
                    self.group_result.revoke(terminate=True, signal='SIGKILL')
                log.warn("revoked tasks and killed workers ...")

            #return number of analyzed apks
            return self.stop_analysis_view()
Beispiel #16
0
    def run(self,
            androscripts,
            min_script_needs,
            script_hashes,
            apk_zipfile_or_hash,
            is_hash=True,
            fast_apk=None):
        '''
        Do the analysis on the apk with the given scripts.

        Parameters
        ----------
        androscripts : list<str>
            List of package names.
        script_hashes : list<str>
            If given, set the hash for the `AndroScript`s
        min_script_needs : tuple<bool>
            See :py:method:`ScriptUtil.get_maximal_script_options`.
        apk_zipfile_or_hash : str
            The raw contents of the .apk file or the hash (sha256).
            The raw content of the .apk file (zipfile) or the hash of it (id in db).
        is_hash : bool, optional (default is True)
            Determines if `apk_zipfile_or_hash` is a hash (id).
        fast_apk : FastApk, optional (default is None)
            Holds the meta infos for the apk.

        Returns
        -------
        tuple<tuple<str, bool>>
            First component is the id of the entry
            and the second a boolean indication if the result has been stored in gridfs.
        ()
            If an error occurred.
        '''
        try:
            # method retry_arguments
            self.__retry_arguments = androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash, fast_apk
            eandro_apk = None
            do_script_hash_validation = settings.script_hash_validation_enabled(
            )

            # open database/apk storage if not already done
            # reschedule job if connection/open error
            self.__open_db()
            self.__open_apk_storage()

            # setup scripts
            if do_script_hash_validation:
                # validate sent hashes with local script hashes
                self.__setup_scripts_hash_validation(androscripts,
                                                     script_hashes)
            else:
                # reuse if possible
                self.__setup_scripts_reuse(androscripts, script_hashes)

            # open apk
            if not is_hash:
                log.info("opening apk via raw data ... ")
                eandro_apk = AnalyzeUtil.open_apk(
                    apk_or_path=apk_zipfile_or_hash, apk=fast_apk, raw=True)
            else:
                # get apk from prefetched apk pool
                eandro_apk = apk_prefetch_pool.get(apk_zipfile_or_hash, None)
                # could not prefetch
                if eandro_apk is None:
                    eandro_apk = self.__get_apk_from_storage_retry(
                        apk_zipfile_or_hash, apk=fast_apk)

            # if None, could not be opened and error has been logged
            if eandro_apk is not None:
                result = AnalyzeUtil.analyze_apk(
                    eandro_apk,
                    self.androscripts,
                    min_script_needs,
                    propagate_error=False,
                    reset_scripts=not do_script_hash_validation)

                if result is not None:
                    fastapk, script_results = result

                    log.info("analyzed %s", fastapk.short_description())
                    storage_results = self.__store_results(
                        fastapk, script_results)
                    # can be None if errorr occurred
                    if storage_results:
                        return tuple(storage_results)

            return ()
        except SoftTimeLimitExceeded:
            log.warn("Task %s exceeded it's soft time limit!", self)
            raise
        except ScriptHashValidationError:
            raise
        finally:
            # delete from pool -> we don't need it anymore in the pool
            if is_hash and apk_zipfile_or_hash in apk_prefetch_pool:
                del apk_prefetch_pool[apk_zipfile_or_hash]
Beispiel #17
0
    def androguard_load_from_io(file_like_object=None,
                                apk_file_path=None,
                                calculate_hash=True):
        ''' Load a FastApk from file-like object or path by using `androgaurd`.
        Parameters
        ----------
        file_like_object : file-like-object, optional (default is None)
            A file-like obj that points to the apk.
            If non given, try to open a file_like_object from the given `apk_file_path`.
        apk_file_path: str, optional (default is "not set")
            Path of apk
        calculate_hash : bool
            If true calculate the hash.
            This means the file has be to loaded completely into memory.
            If False, the hash will be calculated the first time it gets retrieved.

        Returns
        -------
        apk: FastApk

        Raises
        ------
        CouldNotOpenApk
            If the apk file could not be opened
        '''
        # prevent circular import
        from androlyze.analyze import AnalyzeUtil

        # apk will be loaded from `flo` variable
        flo = file_like_object

        # indicates if file has been opened from path
        file_open_from_path = False
        # no file_like_object given, open file from path
        if file_like_object is None and isinstance(apk_file_path, str):
            try:
                flo = open(apk_file_path, "rb")
                file_open_from_path = True
            except IOError as e:
                flo.close()
                raise CouldNotOpenApk(apk_file_path,
                                      e), None, sys.exc_info()[2]

        # if file path not set, show at least that it's not seen in the exceptions
        if apk_file_path is None:
            apk_file_path = "not set"

        # load apk into memory and calculate hash if option set
        flo.seek(0)
        _hash = None
        data = flo.read()
        if calculate_hash:
            # calculate hash
            _hash = Util.sha256(data)
            flo.seek(0)

        apk = AnalyzeUtil.open_apk(data, raw=True, path=apk_file_path)

        if file_open_from_path:
            flo.close()

        if apk is not None:
            try:
                return FastApk.load_from_eandroapk(apk)
            except KeyError:
                pass

        # could not open apk -> raise error
        raise CouldNotOpenApk(file_path=apk_file_path)
Beispiel #18
0
    def _analyze(self, test=False):
        '''
        Start the analysis and store the results in the predefined place.

        Parameters
        ----------
        test : bool, optional (default is False)
            Use for testing. Will not store any result !

        Returns
        -------
        int
            Number of analyzed apks
        list<ResultObject>
            List of the results (only if `test`)
        '''
        androscripts = self.script_list

        # collect results for test mode
        test_results = []

        # get minimum options for all scripts -> boost performance
        # use only as much options as needed!

        # run over apks
        for apk_path, _apk, _ in apk_gen(self.apks_or_paths):

            eandro_apk = open_apk(apk_path, apk=_apk)

            # if is None error happened and has been logged
            # otherwise proceed with analysis
            if eandro_apk is not None:

                # tuple<FastApk, AndroScript>
                res = AnalyzeUtil.analyze_apk(eandro_apk,
                                              androscripts,
                                              self.min_script_needs,
                                              reset_scripts=True)

                if res:
                    # unpack results
                    fastapk, script_results = res

                    # store results if not in test mode
                    if not test:
                        for script in script_results:

                            try:
                                storage_result = AnalyzeUtil.store_script_res(
                                    self.storage, script, fastapk)
                                # keep storage results
                                self.add_storage_result(storage_result)
                            except StorageException as e:
                                log.warn(e)
                    else:
                        # deliver result object in testing mode
                        test_results += [s.res for s in script_results]

                    clilog.info("analyzed %s", fastapk.short_description())

                # increment counter, no lock needed, nobody else is writing to this value
                self.cnt_analyzed_apks.value += 1

        if test:
            return test_results

        return self.cnt_analyzed_apks.value
Beispiel #19
0
    def androguard_load_from_io(file_like_object = None, apk_file_path = None, calculate_hash = True):
        ''' Load a FastApk from file-like object or path by using `androgaurd`.
        Parameters
        ----------
        file_like_object : file-like-object, optional (default is None)
            A file-like obj that points to the apk.
            If non given, try to open a file_like_object from the given `apk_file_path`.
        apk_file_path: str, optional (default is "not set")
            Path of apk
        calculate_hash : bool
            If true calculate the hash.
            This means the file has be to loaded completely into memory.
            If False, the hash will be calculated the first time it gets retrieved.

        Returns
        -------
        apk: FastApk

        Raises
        ------
        CouldNotOpenApk
            If the apk file could not be opened
        '''
        # prevent circular import
        from androlyze.analyze import AnalyzeUtil

        # apk will be loaded from `flo` variable
        flo = file_like_object

        # indicates if file has been opened from path
        file_open_from_path = False
        # no file_like_object given, open file from path
        if file_like_object is None and isinstance(apk_file_path, str):
            try:
                flo = open(apk_file_path, "rb")
                file_open_from_path = True
            except IOError as e:
                flo.close()
                raise CouldNotOpenApk(apk_file_path, e), None, sys.exc_info()[2]

        # if file path not set, show at least that it's not seen in the exceptions
        if apk_file_path is None:
            apk_file_path = "not set"

        # load apk into memory and calculate hash if option set
        flo.seek(0)
        _hash = None
        data = flo.read()
        if calculate_hash:
            # calculate hash
            _hash = Util.sha256(data)
            flo.seek(0)

        apk = AnalyzeUtil.open_apk(data, raw = True, path = apk_file_path)

        if file_open_from_path:
            flo.close()

        if apk is not None:
            try:
                return FastApk.load_from_eandroapk(apk)
            except KeyError:
                pass

        # could not open apk -> raise error
        raise CouldNotOpenApk(file_path = apk_file_path)
    def _analyze(self):
        ''' See doc of :py:method:BaseAnalyzer.analyze`. '''
        try:
            work_queue = self.work_queue

            # create worker pool
            log.debug("starting %s workers ...", self.concurrency)
            for _ in range(self.concurrency):
                p = Worker(self.script_list, self.script_hashes, self.min_script_needs,
                                                 work_queue, self.storage,
                                                 self.cnt_analyzed_apks, self.analyzed_apks, self.storage_results)
                self.workers.append(p)
                p.daemon = True

            # start workers
            for p in self.workers:
                p.start()

            # queue has size limit -> start workers first then enqueue items
            log.info("Loading apk paths into work queue ...")
            for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths):
                # task is apk with all scripts
                work_queue.put(apk_stuff)

            for _ in range(self.concurrency):
                # signal end-of-work
                work_queue.put(STOP_SENTINEL)

            # progress view for cli
            av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks, self.analyzed_apks)
            av.daemon = True
            av.start()
            
            # block until workers finished
            work_queue.join()
            av.terminate()
            log.debug("joined on work queue ...")

            return self.cnt_analyzed_apks.value

        # try hot shutdown first
        except KeyboardInterrupt:
            log.warn("Hot shutdown ... ")
            try:
                log.warn("clearing work queue ... ")
                Util.clear_queue(work_queue)
                log.warn("cleared work queue ... ")
                
                for _ in range(self.concurrency):
                    # signal end-of-work
                    work_queue.put(STOP_SENTINEL)
                    
                for worker in self.workers:
                    worker.join()
                log.warn("waited for all workers ... ")

                return self.cnt_analyzed_apks.value

            # if user really wants make a cold shutdown -> kill processes
            except KeyboardInterrupt:
                log.warn("Cold shutdown ... ")
                log.warn("Hard shutdown wanted! Killing all workers!")

                # kill processes via SIGINT -> send CTRL-C
                for w in self.workers:
                    try:
                        os.kill(w.pid, signal.SIGINT)
                    except:
                        pass

                return self.cnt_analyzed_apks.value
Beispiel #21
0
    def _analyze(self):
        ''' See doc of :py:method:BaseAnalyzer.analyze`. '''
        try:
            work_queue = self.work_queue

            # create worker pool
            log.debug("starting %s workers ...", self.concurrency)
            for _ in range(self.concurrency):
                p = Worker(self.script_list, self.script_hashes,
                           self.min_script_needs, work_queue, self.storage,
                           self.cnt_analyzed_apks, self.analyzed_apks,
                           self.storage_results)
                self.workers.append(p)
                p.daemon = True

            # start workers
            for p in self.workers:
                p.start()

            # queue has size limit -> start workers first then enqueue items
            log.info("Loading apk paths into work queue ...")
            for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths):
                # task is apk with all scripts
                work_queue.put(apk_stuff)

            for _ in range(self.concurrency):
                # signal end-of-work
                work_queue.put(STOP_SENTINEL)

            # progress view for cli
            av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks,
                                   self.analyzed_apks)
            av.daemon = True
            av.start()

            # block until workers finished
            work_queue.join()
            av.terminate()
            log.debug("joined on work queue ...")

            return self.cnt_analyzed_apks.value

        # try hot shutdown first
        except KeyboardInterrupt:
            log.warn("Hot shutdown ... ")
            try:
                log.warn("clearing work queue ... ")
                Util.clear_queue(work_queue)
                log.warn("cleared work queue ... ")

                for _ in range(self.concurrency):
                    # signal end-of-work
                    work_queue.put(STOP_SENTINEL)

                for worker in self.workers:
                    worker.join()
                log.warn("waited for all workers ... ")

                return self.cnt_analyzed_apks.value

            # if user really wants make a cold shutdown -> kill processes
            except KeyboardInterrupt:
                log.warn("Cold shutdown ... ")
                log.warn("Hard shutdown wanted! Killing all workers!")

                # kill processes via SIGINT -> send CTRL-C
                for w in self.workers:
                    try:
                        os.kill(w.pid, signal.SIGINT)
                    except:
                        pass

                return self.cnt_analyzed_apks.value