コード例 #1
0
ファイル: AndroScript.py プロジェクト: umr-ds/androlyze
 def gen_unique_id(self):
     ''' Generate an unique id = sha256(apk hash + script name) '''
     try:
         return sha256(self.res.apk.hash + self.name)
     except AttributeError:
         log.warn('Could not calculate unique id for %s', self)
         raise
コード例 #2
0
ファイル: AnalyzeUtil.py プロジェクト: faheemfayyaz/androlyze
def apk_id_or_raw_data_gen(apk_gen, force_raw_data = False):
    ''' Generator over the .apk files if only path given (or `force_raw_data`).
    Otherwise generator over the apk ids.

    Errors will be logged!.

    Parameters
    ----------
    apk_gen : iterable<tuple<str, Apk, bool>>
        See :py:method:`.AnalyzeUtil.apk_gen`
    force_raw_data : bool, optional (default is False)
        If true, force to yield zipfile rather than hash.

    Returns
    -------
    generator<tuple<object, bool>>
        Raw zip file or id.
        Second component of tuples indicates that the generator is other the id's
        rather than over the zip files.
        Last is an `Apk` object.
    '''
    for apk_path, _apk, is_apk in apk_gen:
        if is_apk and not force_raw_data:
            yield _apk.hash, True, _apk
        else:
            try:
                with open(apk_path, mode = "rb") as f:
                    apk_zipfile = f.read()
                    yield apk_zipfile, False, _apk
            except IOError as e:
                log.warn(e)
コード例 #3
0
ファイル: AnalyzeUtil.py プロジェクト: umr-ds/androlyze
def apk_id_or_raw_data_gen(apk_gen, force_raw_data=False):
    ''' Generator over the .apk files if only path given (or `force_raw_data`).
    Otherwise generator over the apk ids.

    Errors will be logged!.

    Parameters
    ----------
    apk_gen : iterable<tuple<str, Apk, bool>>
        See :py:method:`.AnalyzeUtil.apk_gen`
    force_raw_data : bool, optional (default is False)
        If true, force to yield zipfile rather than hash.

    Returns
    -------
    generator<tuple<object, bool>>
        Raw zip file or id.
        Second component of tuples indicates that the generator is other the id's
        rather than over the zip files.
        Last is an `Apk` object.
    '''
    for apk_path, _apk, is_apk in apk_gen:
        if is_apk and not force_raw_data:
            yield _apk.hash, True, _apk
        else:
            try:
                with open(apk_path, mode="rb") as f:
                    apk_zipfile = f.read()
                    yield apk_zipfile, False, _apk
            except IOError as e:
                log.warn(e)
コード例 #4
0
ファイル: AndroScript.py プロジェクト: faheemfayyaz/androlyze
 def gen_unique_id(self):
     ''' Generate an unique id = sha256(apk hash + script name) '''
     try:
         return sha256(self.res.apk.hash + self.name)
     except AttributeError:
         log.warn('Could not calculate unique id for %s', self)
         raise
コード例 #5
0
 def __del__(self):
     ''' Close database '''
     try:
         log.info("Closing database %s", self.__db_name)
         if self.conn is not None:
             self.conn.close()
     except sqlite3.Error as e:
         log.warn(e)
コード例 #6
0
 def __del__(self):
     ''' Close database '''
     try:
         log.info("Closing database %s", self.__db_name)
         if self.conn is not None:
             self.conn.close()
     except sqlite3.Error as e:
         log.warn(e)
コード例 #7
0
ファイル: AnalyzeUtil.py プロジェクト: umr-ds/androlyze
def open_apk(apk_or_path=None, apk=None, raw=False, path=None):
    '''
    Open apk and set meta information from `apk`

    Parameters
    ----------
    apk_or_path : str, optional (default is None).
        Path to apk.
    apk : Apk, optional (default is None)
        If given, take the meta infos from `apk`.
        So we don't need to recompute the hash.
        At least if `apk_or_path`.

    raw : bool, optional (default is False)
        If specified, use `apk` as raw .apk data.
    path : str, optional (default is None)
        Can be used for `raw` to set the path of the `EAndroApk`.
        If not given, won't be set.

    Returns
    -------
    EAndroApk
    None
        If apk could not be opened.
    '''

    apk_descr = str(apk_or_path)
    if raw:
        apk_descr = "raw data"

    try:
        eandro_apk = None
        if not raw:
            eandro_apk = EAndroApk(apk_or_path)

        else:
            eandro_apk = EAndroApk(apk_or_path, raw=True)
            eandro_apk.path = path

        if apk is not None:
            # we don't want to lose meta infos
            # use the hash from db so we don't need to recompute
            eandro_apk.set_meta(apk)

        return eandro_apk
    except BadZipfile as e:
        log.warn("Apk %s is not a valid zip file!" % apk_descr)
    except (struct.error, IOError) as e:
        log.warn(CouldNotOpenApk(apk_descr, e))
    except Exception as e:
        log.exception(e)
コード例 #8
0
ファイル: AnalyzeUtil.py プロジェクト: faheemfayyaz/androlyze
def open_apk(apk_or_path = None, apk = None, raw = False, path = None):
    '''
    Open apk and set meta information from `apk`

    Parameters
    ----------
    apk_or_path : str, optional (default is None).
        Path to apk.
    apk : Apk, optional (default is None)
        If given, take the meta infos from `apk`.
        So we don't need to recompute the hash.
        At least if `apk_or_path`.

    raw : bool, optional (default is False)
        If specified, use `apk` as raw .apk data.
    path : str, optional (default is None)
        Can be used for `raw` to set the path of the `EAndroApk`.
        If not given, won't be set.

    Returns
    -------
    EAndroApk
    None
        If apk could not be opened.
    '''

    apk_descr = str(apk_or_path)
    if raw:
        apk_descr = "raw data"

    try:
        eandro_apk = None
        if not raw:
            eandro_apk = EAndroApk(apk_or_path)

        else:
            eandro_apk = EAndroApk(apk_or_path, raw = True)
            eandro_apk.path = path

        if apk is not None:
            # we don't want to lose meta infos
            # use the hash from db so we don't need to recompute
            eandro_apk.set_meta(apk)

        return eandro_apk
    except BadZipfile as e:
        log.warn("Apk %s is not a valid zip file!" % apk_descr)
    except (struct.error, IOError) as e:
        log.warn(CouldNotOpenApk(apk_descr, e))
    except Exception as e:
        log.exception(e)
コード例 #9
0
    def get_eandro_apk(self, _id):
        '''
        Get the `EAndroApk` from database.

        Returns
        -------
        EAndroApk
        None
            If Apk could not be loaded
        '''
        try:
            return self.get_apk(_id)
        except (DatabaseLoadException, NoFile) as e:
            log.warn(e)
コード例 #10
0
ファイル: Util.py プロジェクト: umr-ds/androlyze
def log_will_retry(secs, exc = None, what = ''):
    '''
    Parameters
    ----------
    secs : int
        Retry in `secs` seconds.
    exc: Exception, optional (default is None)
        Exception to log
    what : str, optional (default is '')
        What to try again.
    '''
    if exc is not None:
        log.exception(exc)
    log.warn("Trying %s again in %ss", what, secs)
コード例 #11
0
ファイル: AnalyzeUtil.py プロジェクト: faheemfayyaz/androlyze
def apk_zipfile_gen(apk_gen):
    ''' Generator over the .apk files (raw data). Errors will be logged!.

    Parameters
    ----------
    apk_gen : iterable<tuple<str, Apk, bool>>
        See :py:method:`.AnalyzeUtil.apk_gen`
    '''
    for apk_path, _apk, _ in apk_gen:
        if isinstance(apk_path, (str, unicode)):
            try:
                with open(apk_path, mode = "rb") as f:
                    apk_zipfile = f.read()
                    yield apk_zipfile
            except IOError as e:
                log.warn(e)
コード例 #12
0
    def create_entry_for_apks(self, apks, update, tag=None):
        ''' Create entry for the `apks`.

        Parameters
        ----------
        apk: iterable<Apk>
        update : bool
            Update apks that have already been imported.
        tag : str, optional (default is None)
            Tag the apk with some text.
        '''
        for apk in apks:
            try:
                self.create_entry_for_apk(apk, update, tag)
            except StorageException as e:
                log.warn(e)
コード例 #13
0
ファイル: AnalyzeUtil.py プロジェクト: umr-ds/androlyze
def apk_zipfile_gen(apk_gen):
    ''' Generator over the .apk files (raw data). Errors will be logged!.

    Parameters
    ----------
    apk_gen : iterable<tuple<str, Apk, bool>>
        See :py:method:`.AnalyzeUtil.apk_gen`
    '''
    for apk_path, _apk, _ in apk_gen:
        if isinstance(apk_path, (str, unicode)):
            try:
                with open(apk_path, mode="rb") as f:
                    apk_zipfile = f.read()
                    yield apk_zipfile
            except IOError as e:
                log.warn(e)
コード例 #14
0
    def create_entry_for_apks(self, apks, update, tag = None):
        ''' Create entry for the `apks`.

        Parameters
        ----------
        apk: iterable<Apk>
        update : bool
            Update apks that have already been imported.
        tag : str, optional (default is None)
            Tag the apk with some text.
        '''
        for apk in apks:
            try:
                self.create_entry_for_apk(apk, update, tag)
            except StorageException as e:
                log.warn(e)
コード例 #15
0
ファイル: ApkStorageFactory.py プロジェクト: umr-ds/androlyze
def get_apk_storage(settings):
    ''' Get an object implementing the `ApkCopyInterface`.
    
    Parameters
    ----------
    settings : Settings
    '''
    import androlyze.settings as s
    storage_engine = settings.get_apk_storage_engine()
    log.warn("Using APK storage: %s" % storage_engine)
    
    if storage_engine == s.SECTION_S3_STORAGE:
        return S3Storage.factory_from_config(settings)
    elif storage_engine == s.SECTION_RESULT_DB:
        return ResultDatabaseStorage.factory_from_config(settings)
    else:
        raise RuntimeError("No Storage engine defined! But requested!")
コード例 #16
0
ファイル: ApkStorageFactory.py プロジェクト: helit2/androlyze
def get_apk_storage(settings):
    """ Get an object implementing the `ApkCopyInterface`.
    
    Parameters
    ----------
    settings : Settings
    """
    import androlyze.settings as s

    storage_engine = settings.get_apk_storage_engine()
    log.warn("Using APK storage: %s" % storage_engine)

    if storage_engine == s.SECTION_S3_STORAGE:
        return S3Storage.factory_from_config(settings)
    elif storage_engine == s.SECTION_RESULT_DB:
        return ResultDatabaseStorage.factory_from_config(settings)
    else:
        raise RuntimeError("No Storage engine defined! But requested!")
コード例 #17
0
    def store_result_dict(self, res_dict):
        '''
        Store the analysis results from the `res_dict`.
        All needed infos for storage will be taken from it.

        Parameters
        ----------
        res_dict : dict
            See `ResultObject.description_dict`
        '''

        fastapk = FastApk.load_from_result_dict(res_dict)
        script = AndroScript.load_from_result_dict(res_dict, fastapk)

        try:
            self.create_entry_for_apk(fastapk, update = True)
            self.store_result_for_apk(fastapk, script)
        except FileSysStoreException as e:
            log.warn(e)
コード例 #18
0
ファイル: Worker.py プロジェクト: faheemfayyaz/androlyze
    def __store_results(self, results):
        ''' Store the results and increase the analyzed apks counter.

        Parameters
        ----------
        results : list<FastApk, AndroScript>
        '''
        for res in results:

            # unpack results
            fastapk, script_results = res

            for script in script_results:
                try:
                    storage_result = AnalyzeUtil.store_script_res(self.storage, script, fastapk)
                    self.add_storage_result(storage_result)
                except StorageException as e:
                    log.warn(e)

            self.add_analyzed_apks_sm(1)
コード例 #19
0
    def __store_results(self, results):
        ''' Store the results and increase the analyzed apks counter.

        Parameters
        ----------
        results : list<FastApk, AndroScript>
        '''
        for res in results:

            # unpack results
            fastapk, script_results = res

            for script in script_results:
                try:
                    storage_result = AnalyzeUtil.store_script_res(
                        self.storage, script, fastapk)
                    self.add_storage_result(storage_result)
                except StorageException as e:
                    log.warn(e)

            self.add_analyzed_apks_sm(1)
コード例 #20
0
def get_default_scripts():
    ''' Get the default scripts that shall be loaded according to `SCRIPT_SETTINGS_PATH`.
    Returns list<str> (list of paths to script).

    Returns empty list if error occurred.
    '''
    import json
    paths = []
    try:
        config_path = expanduser(SCRIPT_SETTINGS_PATH)
        with open(config_path, "r") as f:
            script_templates = json.load(f)
            # get key that selects the key for script loading
            script_key = script_templates[SCRIPT_SETTINGS_KEY_SCRIPTS][
                SCRIPT_SETTINGS_SCRIPTS_LOAD]
            paths = script_templates[SCRIPT_SETTINGS_KEY_SCRIPTS][script_key]
    except IOError:
        pass
    except ValueError as e:
        log.warn("Error loading %s due to: %s", config_path, e)
    return paths
コード例 #21
0
ファイル: __init__.py プロジェクト: helit2/androlyze
def get_default_scripts():
    """ Get the default scripts that shall be loaded according to `SCRIPT_SETTINGS_PATH`.
    Returns list<str> (list of paths to script).

    Returns empty list if error occurred.
    """
    import json

    paths = []
    try:
        config_path = expanduser(SCRIPT_SETTINGS_PATH)
        with open(config_path, "r") as f:
            script_templates = json.load(f)
            # get key that selects the key for script loading
            script_key = script_templates[SCRIPT_SETTINGS_KEY_SCRIPTS][SCRIPT_SETTINGS_SCRIPTS_LOAD]
            paths = script_templates[SCRIPT_SETTINGS_KEY_SCRIPTS][script_key]
    except IOError:
        pass
    except ValueError as e:
        log.warn("Error loading %s due to: %s", config_path, e)
    return paths
コード例 #22
0
def action_delete_apks_import(storage, delete_apk = False, hashes = None, package_names = None, tags = None, select_whole_db = False):
    ''' Delete from the import storage (database and/or filesys)

    Parameters
    ----------
    storage : RedundantStorage
        The store to use.
    delete_apk : boolean, optional (default is False)
    hashes : iterable<str>, optional (default is None)
    package_names : iterable<str>, optional (default is None)
    tags : iterable<str>, optional (default is None)
    select_whole_db : boolean, optional (default is False)
        If true, select the whole import database! Be careful!
        This means we do not take `hashes`, `package_names` and `tags` into acccount!

    Raises
    ------
    ValueError
    '''
    try:
        apks = None
        if select_whole_db:
            apks = action_query_import_db(storage, COMMAND_QUERY_APKS, hashes, package_names, tags)
        # If don't delete whole database!!!!!
        elif len(Util.filter_not_none((hashes, package_names, tags))) > 0:
            apks = action_query_import_db(storage, COMMAND_QUERY_APKS, hashes, package_names, tags)
        else:
            raise ValueError('''Neither hashes nor package names nor tags specified!
             If you wan't do use the whole database, set `select_whole_db` to true.
             ''')

        # use list, otherwise we have duplicates due to the generator
        for apk in list(apks):
            if delete_apk:
                clilog.info("Will delete from database and file system: \n%s ", apk.detailed_description())
            else:
                clilog.info("Will delete %s from database: %s ", apk.short_description(), storage.import_db_storage)
            storage.delete_entry_for_apk(apk, delete_apk)
    except StorageException as e:
        log.warn(e)
コード例 #23
0
def run_analysis(analyzer):
    ''' Run the analysis with the `analyzer`.

    Parameters
    ----------
    analyzer : BaseAnalyzer

    Returns
    -------
    int
        Number of analyzed apks.
    '''
    from androlyze.analyze.exception import AndroScriptError

    try:
        cnt_analyzed_apks = analyzer.analyze()
        if  cnt_analyzed_apks == 0:
            log.warn("No apk file has been analyzed !")
        else:
            log.warn("Analyzed %s apks", cnt_analyzed_apks)

        return cnt_analyzed_apks
    except AndroScriptError as e:
        log.exception(e)
コード例 #24
0
def action_import_apks(storage, apk_paths,
                       copy_apk = False, copy_to_mongodb = False,
                       update = False, tag = None,
                       # shared memory
                       cnt_imported_apks = None, total_apk_count = None, import_finished = None,
                       # concurrent settings
                       concurrency = None
                       ):

    ''' Import the apks from the `apk_paths` and create the file system structure
    where the results will be kept, specified by `storage`.

    Parameters
    ----------
    storage : RedundantStorage
        The store to use.
    apk_paths : iterable<str>
        The apk files and/or directories.
    copy_apk : bool
        Import the apk file to the `import_dir` (copy it).
    copy_to_mongodb : bool, optional (default is False)
        Also import into MongoDB. Useful for the distributed analysis.
    update : bool
        Update apks that have already been imported.
    tag : str, optional (default is None)
        Some tag
    cnt_imported_apks : multiprocessing.Value<int>, optional (default is None)
        If given, use for progress updating.
    total_apk_count : multiprocessing.Value<int>, optional (default is None)
        If given, use for total count of apks.
    import_finished : multiprocessing.Value<byte>, optional (default is None)
        If given, use to signal that import has been completed.
    concurrency : int, optional (default is number of cpus)
        Number of processes to use for the import.
    '''
    from androlyze.loader.ApkImporter import ApkImporter

    # get single paths to apks so we get the correct total count of apks
    clilog.info("looking for apks in given paths ... ")
    apk_paths = ApkImporter.get_apks_from_list_or_dir(apk_paths)

    if total_apk_count is not None:
        # may be time consuming for recursive lookup
        apk_paths, total_apk_count.value = Util.count_iterable_n_clone(apk_paths)

    # create count if not given
    if cnt_imported_apks is None:
        cnt_imported_apks = Value('i', 0, lock = RLock())

    # set concurrency
    if concurrency is None:
        concurrency = cpu_count()
    log.warn("Using %d processes", concurrency)

    clilog.info("Storage dir is %s" % storage.fs_storage.store_root_dir)
    if copy_apk:
        clilog.info("Copying APKs to %s ..." % storage.fs_storage.store_root_dir)

    def import_apks(apk_paths):
        apk_importer = ApkImporter(apk_paths, storage)
        for apk in apk_importer.import_apks(copy_apk = copy_apk, copy_to_mongodb = copy_to_mongodb,
                                                update = update, tag = tag):

            clilog.info("imported %s", apk.short_description())

            # use shared memory counter if given
            if cnt_imported_apks is not None:
                with cnt_imported_apks.get_lock():
                    cnt_imported_apks.value += 1

    pool = []


    # don't convert generator to list if only 1 process wanted
    apk_paths = [apk_paths] if concurrency == 1 else Util.split_n_uniform_distri(list(apk_paths), concurrency)

    # start parallel import
    # multiprocessing's pool causes pickle errors
    for i in range(concurrency):
        p = Process(target = import_apks, args = (apk_paths[i], ))
        log.debug("starting process %s", p)
        pool.append(p)
        p.start()

    for it in pool:
        log.debug("joined on process %s", p)
        it.join()

    apks_imported = cnt_imported_apks.value != 0
    # show some message that no APK has been imported
    if not apks_imported:
        log.warn("No .apk file has been imported! This means no .apk file has been found or they already have been imported.")
    else:
        clilog.info("done")

    # because not all apks may be importable, we cannot use we count for signal that the import is done
    if import_finished is not None:
        import_finished.value = 1

    clilog.info("Imported %d apks", cnt_imported_apks.value)
コード例 #25
0
def create_analyzer(storage, script_list, apks_or_paths = None,
                   mode = ANALYZE_MODE_PARALLEL, concurrency = None,
                   serialize_apks = True
                   ):
    '''
    Create the analyzer only.

    Parameters
    ----------
    storage : RedundantStorage
        The store to use.
    script_list : list<str>
        List of paths to scripts (complete filename with extension).
    apks_or_paths: list<str> or list<Apk>, optional (default is None)
        List of `Apk` or paths to the apks which shall be analyzed with the given scripts
        If you analyze from paths the `import_date` is not set!
    mode : str, optional (default is `ANALYZE_MODE_PARALLEL`)
        Do an parallel analysis by default. Choose between : , , .
    concurrency : int, optional (default is number of cpu cores)
        Number of workers to spawn.
    serialize_apks : bool, optional (default is True)
        If true, serialize .apk .
        Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
        Be sure to import the apks to the result db first!
    '''
    from androlyze.model.script import ScriptUtil
    from androlyze.analyze.exception import AndroScriptError

    try:
        # list<type<AndroScript>>
        androscript_list = ScriptUtil.import_scripts(script_list)
        instantiated_scripts = sorted(ScriptUtil.instantiate_scripts(androscript_list, script_paths = script_list))

        if len(instantiated_scripts) == 0:
            log.warn("No scripts supplied!")
            return

        # get hashes for `AndroScript`s so that we can set the hash directly next time we instantiate the script
        script_hashes = [s.hash for s in instantiated_scripts]
        min_script_needs = ScriptUtil.get_minimum_script_options(instantiated_scripts)

        # log infos about scripts
        clilog.info('Loaded scripts:\n%s', '\n'.join((str(s) for s in instantiated_scripts)))
        log.info(ScriptUtil.androscript_options_descr(instantiated_scripts))

        if apks_or_paths:

            def create_analyzer():

                analyzer = None
                # argument for BaseAnalyzer
                args = storage, androscript_list, script_hashes, min_script_needs, apks_or_paths
                log.info("Mode: %s", mode)

                # normal analyzer
                if mode == ANALYZE_MODE_NON_PARALLEL:
                    from androlyze.analyze.Analyzer import Analyzer
                    analyzer = Analyzer(*args)
                # use parallel analyzer
                elif mode == ANALYZE_MODE_PARALLEL:
                    from androlyze.analyze.parallel.ParallelAnalyzer import ParallelAnalyzer
                    analyzer = ParallelAnalyzer(*args, concurrency = concurrency)
                # use distributed one
                elif mode == ANALYZE_MODE_DISTRIBUTED:
                    from androlyze.analyze.distributed.DistributedAnalyzer import DistributedAnalyzer
                    analyzer = DistributedAnalyzer(*args, concurrency = concurrency, serialize_apks = serialize_apks)

                return analyzer

            return create_analyzer()

    except ApkImportError as e:
        log.warn(e)
    except IOError as e:
        log.warn(AndroScriptError(e.filename, caused_by = e))
        sys.exit(1)
    except ImportError as e:
        log.exception(e)
    except Exception as e:
        log.exception(e)
コード例 #26
0
    def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs):
        ''' Analyze by running all `AndroScript`s '''

        # log script meta ?
        log_script_meta = kwargs.get("log_script_meta", True)
        # may be disabled! check!
        if not self.log_chained_script_meta_infos():
            log_script_meta = False

        # don't log script meta infos in chained scripts inside this `ChainedScript`
        kwargs["log_script_meta"] = False

        if log_script_meta:
            # log meta infos
            self._log_chained_script_meta()

        # collect results from scripts
        collected_results = self.res

        # run over scripts
        for ascript in self.chain_scripts():
            script_result = None
            chained_script_name = self.try_get_chained_script_name(ascript)
            try:
                # analyze with script
                script_result = ascript.analyze(apk, dalvik_vm_format, vm_analysis, gvm_analysis,
                                                *args, **kwargs)

                # store results under given categories
                categories = self.root_categories()
                if len(categories) > 0:
                    # run over dict and log items
                    for key, val in script_result.results.items():
                        collected_results.register_keys([key], *categories)
                        collected_results.log(key, val, *categories)

                else:
                    # simply update dict
                    collected_results.results.update(script_result.results)

                if log_script_meta:
                    # log successful run
                    collected_results.log_append_to_enum(CAT_SUCCESSFUL, chained_script_name, CAT_ROOT)

            except Exception as e:
                if log_script_meta:
                    # the value that will be logged for the script failure
                    failure_log_val = chained_script_name

                    # if exception shall be logged, create dict with name as key and exception as value
                    if self.log_script_failure_exception():
                        # exception message
                        exc_msg = Util.format_exception(sys.exc_info(), as_string = False)
                        failure_log_val = {failure_log_val : exc_msg}

                    # log that script encountered an error
                    collected_results.log_append_to_enum(CAT_FAILURES, failure_log_val, CAT_ROOT)

                if not self.continue_on_script_failure():
                    # reraise exception if the analysis shall be stopped
                    # after a script encountered an error
                    raise
                else:
                    log.warn('''%s: The script "%s" on apk: %s caused an error! But the other scripts will still run! Have a look at the options of `ChainedScript` for exception traceback writing!
\tError: %s''' % (self.__class__.__name__, ascript, apk.short_description(), e))
コード例 #27
0
    def fetch_results_from_mongodb(self, rds, results, wait_for_db = True,
                                   # progress
                                   nice_progess = False, synced_entries = None, total_sync_entries = None):
        '''
        Fetch some results from the result database and write them to disk.

        If data cannot be loaded from db, try until it can be.

        Parameters
        ----------
        rds : ResultDatabaseStorage
            The database to query for the results.
        results : list< tuple<id, gridfs (bool)> >
            Define which results shall be fetched.
        wait_for_db : bool, optional (default is True)
            Wait until data could be fetched from db.
        nice_progess : bool, optional (default is False)
            If enabled update show some nice progress bar on the cli.
        synced_entries : multiprocessing.Value<int>, optional (default is None)
            If supplied store number of already synces entries.
        total_sync_entries : multiprocessing.Value<int>, optional (default is None)
            If supplied store number of total entries to sync.

        Raises
        ------
        DatabaseLoadException
            If `wait_for_db` is False and an error occurred.
        '''
        # retry in ... seconds
        DATABASE_RETRY_TIME = 5

        # if true assume both counts are shared memory (Value)
        use_shared_memory = synced_entries is not None and total_sync_entries is not None

        if results is not None:
            results_stored = False
            while not results_stored:
                try:
                    # get ids
                    non_gridfs_ids, gridfs_ids = MongoUtil.split_result_ids(results)

                    # counts
                    cnt_non_gridfs_ids = len(non_gridfs_ids)
                    cnt_gridfs_ids = len(gridfs_ids)

                    if use_shared_memory:
                        total_sync_entries.value = cnt_gridfs_ids + cnt_non_gridfs_ids

                    # gridfs raw data as well as metadata
                    gridfs_entries_raw = []
                    if gridfs_ids:
                        gridfs_entries_raw = rds.get_results_for_ids(gridfs_ids, non_document = True, non_document_raw = True)

                    # regular documents (non gridfs)
                    non_gridfs_entries = []
                    if non_gridfs_ids:
                        non_gridfs_entries = rds.get_results_for_ids(non_gridfs_ids, non_document = False, non_document_raw = True)

                    if not nice_progess:
                        log.debug("fetching %d non-documents (gridfs) ... ", cnt_gridfs_ids)

                    for i, gridfs_entry_raw in enumerate(gridfs_entries_raw, 1):

                        # get our stored metadata (for script and apk)
                        gridfs_entry_meta = gridfs_entry_raw.metadata

                        if not nice_progess:
                            log.debug("getting results for %s", gridfs_entry_meta[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME])
                        else:
                            Util.print_dyn_progress(Util.format_progress(i, cnt_gridfs_ids))

                        # use apk to extract data from dict
                        fastapk = FastApk.load_from_result_dict(gridfs_entry_meta)
                        # get filename
                        file_name = gridfs_entry_raw.filename

                        # write results to disk
                        try:
                            self.store_custom_data(fastapk.package_name, fastapk.version_name, fastapk.hash, file_name, gridfs_entry_raw.read())
                        except FileSysStoreException as e:
                            log.exception(e)

                        # update shared memory progress indicitor
                        if use_shared_memory:
                            with synced_entries.get_lock():
                                synced_entries.value += 1

                    if not nice_progess:
                        log.debug("fetching %d documents (non-gridfs) ... ", cnt_non_gridfs_ids)

                    for i, non_gridfs_entry in enumerate(non_gridfs_entries, 1):
                        if not nice_progess:
                            clilog.debug("getting results for %s" % non_gridfs_entry[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME])
                        else:
                            Util.print_dyn_progress(Util.format_progress(i, cnt_non_gridfs_ids))

                        # write results to disk
                        self.store_result_dict(non_gridfs_entry)

                        # update shared memory progress indicitor
                        if use_shared_memory:
                            with synced_entries.get_lock():
                                synced_entries.value += 1

                    # if not wait for db wanted stop here
                    results_stored = True or not wait_for_db

                except (DatabaseLoadException, PyMongoError) as e:
                    if not wait_for_db:
                        raise
                    log.warn(e)
                    Util.log_will_retry(DATABASE_RETRY_TIME, exc = e)
                    sleep(DATABASE_RETRY_TIME)
コード例 #28
0
ファイル: AnalyzeTask.py プロジェクト: faheemfayyaz/androlyze
    def run(self, androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash = True, fast_apk = None):
        '''
        Do the analysis on the apk with the given scripts.

        Parameters
        ----------
        androscripts : list<str>
            List of package names.
        script_hashes : list<str>
            If given, set the hash for the `AndroScript`s
        min_script_needs : tuple<bool>
            See :py:method:`ScriptUtil.get_maximal_script_options`.
        apk_zipfile_or_hash : str
            The raw contents of the .apk file or the hash (sha256).
            The raw content of the .apk file (zipfile) or the hash of it (id in db).
        is_hash : bool, optional (default is True)
            Determines if `apk_zipfile_or_hash` is a hash (id).
        fast_apk : FastApk, optional (default is None)
            Holds the meta infos for the apk.

        Returns
        -------
        tuple<tuple<str, bool>>
            First component is the id of the entry
            and the second a boolean indication if the result has been stored in gridfs.
        ()
            If an error occurred.
        '''
        try:
            # method retry_arguments
            self.__retry_arguments = androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash, fast_apk
            eandro_apk = None
            do_script_hash_validation = settings.script_hash_validation_enabled()

            # open database/apk storage if not already done
            # reschedule job if connection/open error
            self.__open_db()
            self.__open_apk_storage()

            # setup scripts
            if do_script_hash_validation:
                # validate sent hashes with local script hashes
                self.__setup_scripts_hash_validation(androscripts, script_hashes)
            else:
                # reuse if possible
                self.__setup_scripts_reuse(androscripts, script_hashes)

            # open apk
            if not is_hash:
                log.info("opening apk via raw data ... ")
                eandro_apk = AnalyzeUtil.open_apk(apk_or_path = apk_zipfile_or_hash, apk = fast_apk, raw = True)
            else:
                # get apk from prefetched apk pool
                eandro_apk = apk_prefetch_pool.get(apk_zipfile_or_hash, None)
                # could not prefetch
                if eandro_apk is None:
                    eandro_apk = self.__get_apk_from_storage_retry(apk_zipfile_or_hash, apk = fast_apk)

            # if None, could not be opened and error has been logged
            if eandro_apk is not None:
                result = AnalyzeUtil.analyze_apk(eandro_apk, self.androscripts, min_script_needs, propagate_error = False, reset_scripts = not do_script_hash_validation)

                if result is not None:
                    fastapk, script_results = result

                    log.info("analyzed %s", fastapk.short_description())
                    storage_results = self.__store_results(fastapk, script_results)
                    # can be None if errorr occurred
                    if storage_results:
                        return tuple(storage_results)

            return ()
        except SoftTimeLimitExceeded:
            log.warn("Task %s exceeded it's soft time limit!", self)
            raise
        except ScriptHashValidationError:
            raise
        finally:
            # delete from pool -> we don't need it anymore in the pool
            if is_hash and apk_zipfile_or_hash in apk_prefetch_pool:
                del apk_prefetch_pool[apk_zipfile_or_hash]
コード例 #29
0
    def _analyze(self):
        ''' See doc of :py:method:BaseAnalyzer.analyze`. '''
        try:
            work_queue = self.work_queue

            # create worker pool
            log.debug("starting %s workers ...", self.concurrency)
            for _ in range(self.concurrency):
                p = Worker(self.script_list, self.script_hashes,
                           self.min_script_needs, work_queue, self.storage,
                           self.cnt_analyzed_apks, self.analyzed_apks,
                           self.storage_results)
                self.workers.append(p)
                p.daemon = True

            # start workers
            for p in self.workers:
                p.start()

            # queue has size limit -> start workers first then enqueue items
            log.info("Loading apk paths into work queue ...")
            for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths):
                # task is apk with all scripts
                work_queue.put(apk_stuff)

            for _ in range(self.concurrency):
                # signal end-of-work
                work_queue.put(STOP_SENTINEL)

            # progress view for cli
            av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks,
                                   self.analyzed_apks)
            av.daemon = True
            av.start()

            # block until workers finished
            work_queue.join()
            av.terminate()
            log.debug("joined on work queue ...")

            return self.cnt_analyzed_apks.value

        # try hot shutdown first
        except KeyboardInterrupt:
            log.warn("Hot shutdown ... ")
            try:
                log.warn("clearing work queue ... ")
                Util.clear_queue(work_queue)
                log.warn("cleared work queue ... ")

                for _ in range(self.concurrency):
                    # signal end-of-work
                    work_queue.put(STOP_SENTINEL)

                for worker in self.workers:
                    worker.join()
                log.warn("waited for all workers ... ")

                return self.cnt_analyzed_apks.value

            # if user really wants make a cold shutdown -> kill processes
            except KeyboardInterrupt:
                log.warn("Cold shutdown ... ")
                log.warn("Hard shutdown wanted! Killing all workers!")

                # kill processes via SIGINT -> send CTRL-C
                for w in self.workers:
                    try:
                        os.kill(w.pid, signal.SIGINT)
                    except:
                        pass

                return self.cnt_analyzed_apks.value
コード例 #30
0
    def _analyze(self):
        ''' See doc of :py:method:BaseAnalyzer.analyze`. '''
        try:
            work_queue = self.work_queue

            # create worker pool
            log.debug("starting %s workers ...", self.concurrency)
            for _ in range(self.concurrency):
                p = Worker(self.script_list, self.script_hashes, self.min_script_needs,
                                                 work_queue, self.storage,
                                                 self.cnt_analyzed_apks, self.analyzed_apks, self.storage_results)
                self.workers.append(p)
                p.daemon = True

            # start workers
            for p in self.workers:
                p.start()

            # queue has size limit -> start workers first then enqueue items
            log.info("Loading apk paths into work queue ...")
            for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths):
                # task is apk with all scripts
                work_queue.put(apk_stuff)

            for _ in range(self.concurrency):
                # signal end-of-work
                work_queue.put(STOP_SENTINEL)

            # progress view for cli
            av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks, self.analyzed_apks)
            av.daemon = True
            av.start()
            
            # block until workers finished
            work_queue.join()
            av.terminate()
            log.debug("joined on work queue ...")

            return self.cnt_analyzed_apks.value

        # try hot shutdown first
        except KeyboardInterrupt:
            log.warn("Hot shutdown ... ")
            try:
                log.warn("clearing work queue ... ")
                Util.clear_queue(work_queue)
                log.warn("cleared work queue ... ")
                
                for _ in range(self.concurrency):
                    # signal end-of-work
                    work_queue.put(STOP_SENTINEL)
                    
                for worker in self.workers:
                    worker.join()
                log.warn("waited for all workers ... ")

                return self.cnt_analyzed_apks.value

            # if user really wants make a cold shutdown -> kill processes
            except KeyboardInterrupt:
                log.warn("Cold shutdown ... ")
                log.warn("Hard shutdown wanted! Killing all workers!")

                # kill processes via SIGINT -> send CTRL-C
                for w in self.workers:
                    try:
                        os.kill(w.pid, signal.SIGINT)
                    except:
                        pass

                return self.cnt_analyzed_apks.value
コード例 #31
0
ファイル: Run.py プロジェクト: faheemfayyaz/androlyze
    def run_action(self, cmd):
        ''' Run an action specified by `cmd`(see COMMAND_ prefixed variables) '''

        parser = self.parser
        args = self.args

        # check which command has been used
        if cmd is None:

            # no command specified through program name -> get it from argparser
            cmd = args.command
            
        if cmd in COMMANDS_ALL:
            hashes, package_names, tags = CLIUtil.get_filter_options_from_cli(args)
            yes = args.yes

            if cmd == COMMAND_QUERY:
                self.action_query(hashes, package_names, tags, yes)

            # dblyze -> do the analysis results evaluation            
            elif cmd == COMMAND_EVAL:
                dblyze_scripts = ScriptUtil.import_scripts(args.scripts, clazz_name = "Eval")
                for dblyze_script in dblyze_scripts:
                    dblyze_script().evaluate(self.storage)
                
            # sync from result db to file sys
            elif cmd == COMMAND_SYNC:
                total_entries = androlyze.action_sync_fs(self.storage, lambda _ : False)

                CLIUtil.cli_check_n_exec(androlyze.action_sync_fs,
                                         prompt_prefix = "Will download %d entries from result database!" % total_entries,
                                         circumvent_check = args.yes,
                                         args = (self.storage, lambda _ : True)
                                         )
            else:
                # print welcome message
                clilog.info("Welcome to %s!\n" % PROJECT_NAME)

                # import command
                if cmd == COMMAND_IMPORT:
                    apks_or_paths, _ = self.get_apks_or_paths_from_cli()
                    tag = args.tag
                    copy2disk, copy2db, update, concurrency = args.copy_disk, args.copy_db, args.update, args.concurrency
                    if not update:
                        log.warn('''--update not supplied.
No update of already present apks in database will be done!''')
                    androlyze.action_import_apks(self.storage, apks_or_paths, copy2disk, copy2db, update, tag, concurrency = concurrency)
                # analyze command
                elif cmd == COMMAND_ANALYZE:
                    # androguard path has to be set before
                    from androlyze import action_analyze

                    # sort apks ?
                    get_apks_kwargs = {}
                    no_sort_by_code_size = args.no_sort_code_size
                    if not no_sort_by_code_size:
                        # sort apks by app code size for better scheduling
                        get_apks_kwargs = dict(order_by = TABLE_APK_IMPORT_KEY_SIZE_APP_CODE, ascending = False)
                    apks_or_paths, _ = self.get_apks_or_paths_from_cli(**get_apks_kwargs)

                    # debug infos
                    if not no_sort_by_code_size and not args.apks:
                        apks_or_paths, _it = itertools.tee(apks_or_paths)
                        clilog.info('Using Code Size Scheduling for faster analysis!')
                        log.debug('\n'.join(('%s: %s' % (x.package_name, x.size_app_code) for x in _it)))

                    scripts = args.scripts
                    parallel_mode, concurrency, send_id = self.__load_parallel_settings()

                    # get analysis mode
                    analyze_mode = None
                    if parallel_mode == PARALLELIZATION_MODE_DISTRIBUTED:
                        analyze_mode = ANALYZE_MODE_DISTRIBUTED
                    elif parallel_mode == PARALLELIZATION_MODE_NON_PARALLEL:
                        analyze_mode = ANALYZE_MODE_NON_PARALLEL
                    else:
                        analyze_mode = ANALYZE_MODE_PARALLEL
                    action_analyze(self.storage, scripts, apks_or_paths,
                                   mode = analyze_mode, concurrency = concurrency,
                                   serialize_apks = not send_id)
                # delete command
                elif cmd == COMMAND_DELETE:
                    self.action_delete(parser, hashes, package_names, tags, yes)

                clilog.info("done")
コード例 #32
0
ファイル: Analyzer.py プロジェクト: faheemfayyaz/androlyze
    def _analyze(self, test = False):
        '''
        Start the analysis and store the results in the predefined place.

        Parameters
        ----------
        test : bool, optional (default is False)
            Use for testing. Will not store any result !

        Returns
        -------
        int
            Number of analyzed apks
        list<ResultObject>
            List of the results (only if `test`)
        '''
        androscripts = self.script_list

        # collect results for test mode
        test_results = []

        # get minimum options for all scripts -> boost performance
        # use only as much options as needed!

        # run over apks
        for apk_path, _apk, _ in apk_gen(self.apks_or_paths):

            eandro_apk = open_apk(apk_path, apk=_apk)

            # if is None error happened and has been logged
            # otherwise proceed with analysis
            if eandro_apk is not None:

                # tuple<FastApk, AndroScript>
                res = AnalyzeUtil.analyze_apk(eandro_apk, androscripts, self.min_script_needs, reset_scripts = True)

                if res:
                    # unpack results
                    fastapk, script_results = res

                    # store results if not in test mode
                    if not test:
                        for script in script_results:

                            try:
                                storage_result = AnalyzeUtil.store_script_res(self.storage, script, fastapk)
                                # keep storage results
                                self.add_storage_result(storage_result)
                            except StorageException as e:
                                log.warn(e)
                    else:
                        # deliver result object in testing mode
                        test_results += [s.res for s in script_results]

                    clilog.info("analyzed %s", fastapk.short_description())

                # increment counter, no lock needed, nobody else is writing to this value
                self.cnt_analyzed_apks.value += 1

        if test:
            return test_results

        return self.cnt_analyzed_apks.value
コード例 #33
0
ファイル: Analyzer.py プロジェクト: umr-ds/androlyze
    def _analyze(self, test=False):
        '''
        Start the analysis and store the results in the predefined place.

        Parameters
        ----------
        test : bool, optional (default is False)
            Use for testing. Will not store any result !

        Returns
        -------
        int
            Number of analyzed apks
        list<ResultObject>
            List of the results (only if `test`)
        '''
        androscripts = self.script_list

        # collect results for test mode
        test_results = []

        # get minimum options for all scripts -> boost performance
        # use only as much options as needed!

        # run over apks
        for apk_path, _apk, _ in apk_gen(self.apks_or_paths):

            eandro_apk = open_apk(apk_path, apk=_apk)

            # if is None error happened and has been logged
            # otherwise proceed with analysis
            if eandro_apk is not None:

                # tuple<FastApk, AndroScript>
                res = AnalyzeUtil.analyze_apk(eandro_apk,
                                              androscripts,
                                              self.min_script_needs,
                                              reset_scripts=True)

                if res:
                    # unpack results
                    fastapk, script_results = res

                    # store results if not in test mode
                    if not test:
                        for script in script_results:

                            try:
                                storage_result = AnalyzeUtil.store_script_res(
                                    self.storage, script, fastapk)
                                # keep storage results
                                self.add_storage_result(storage_result)
                            except StorageException as e:
                                log.warn(e)
                    else:
                        # deliver result object in testing mode
                        test_results += [s.res for s in script_results]

                    clilog.info("analyzed %s", fastapk.short_description())

                # increment counter, no lock needed, nobody else is writing to this value
                self.cnt_analyzed_apks.value += 1

        if test:
            return test_results

        return self.cnt_analyzed_apks.value
コード例 #34
0
    def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args,
                 **kwargs):
        ''' Analyze by running all `AndroScript`s '''

        # log script meta ?
        log_script_meta = kwargs.get("log_script_meta", True)
        # may be disabled! check!
        if not self.log_chained_script_meta_infos():
            log_script_meta = False

        # don't log script meta infos in chained scripts inside this `ChainedScript`
        kwargs["log_script_meta"] = False

        if log_script_meta:
            # log meta infos
            self._log_chained_script_meta()

        # collect results from scripts
        collected_results = self.res

        # run over scripts
        for ascript in self.chain_scripts():
            script_result = None
            chained_script_name = self.try_get_chained_script_name(ascript)
            try:
                # analyze with script
                script_result = ascript.analyze(apk, dalvik_vm_format,
                                                vm_analysis, gvm_analysis,
                                                *args, **kwargs)

                # store results under given categories
                categories = self.root_categories()
                if len(categories) > 0:
                    # run over dict and log items
                    for key, val in script_result.results.items():
                        collected_results.register_keys([key], *categories)
                        collected_results.log(key, val, *categories)

                else:
                    # simply update dict
                    collected_results.results.update(script_result.results)

                if log_script_meta:
                    # log successful run
                    collected_results.log_append_to_enum(
                        CAT_SUCCESSFUL, chained_script_name, CAT_ROOT)

            except Exception as e:
                if log_script_meta:
                    # the value that will be logged for the script failure
                    failure_log_val = chained_script_name

                    # if exception shall be logged, create dict with name as key and exception as value
                    if self.log_script_failure_exception():
                        # exception message
                        exc_msg = Util.format_exception(sys.exc_info(),
                                                        as_string=False)
                        failure_log_val = {failure_log_val: exc_msg}

                    # log that script encountered an error
                    collected_results.log_append_to_enum(
                        CAT_FAILURES, failure_log_val, CAT_ROOT)

                if not self.continue_on_script_failure():
                    # reraise exception if the analysis shall be stopped
                    # after a script encountered an error
                    raise
                else:
                    log.warn(
                        '''%s: The script "%s" on apk: %s caused an error! But the other scripts will still run! Have a look at the options of `ChainedScript` for exception traceback writing!
\tError: %s''' % (self.__class__.__name__, ascript, apk.short_description(),
                    e))
コード例 #35
0
    def _analyze(self):
        ''' See doc of :py:method:`.BaseAnalyzer.analyze`. '''

        # try to get registered workers
        # it network fails at this point -> stop analysis
        try:
            clilog.info(CeleryUtil.get_workers_and_check_network())
        except NetworkError as e:
            log.critical(e)
            return 0

        # storage objects
        storage = self.storage

        clilog.info("Number of apks to analyze: %d", self._cnt_apks)

        try:
            # get analyze task
            analyze_task = tasks[CeleryConstants.get_analyze_task_name()]

            # create storage
            storage.create_or_open_sub_storages()

            # send tasks
            start = time()

            # apk generator over .apk or apk hashes
            apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(
                self.apks, force_raw_data=self.serialize_apks)

            clilog.info("Task publishing progress:")

            # send and serialize .apks
            # if analysis via path serialize them!
            if self.serialize_apks:
                log.info("sending .apks to message broker")
                self.group_result = group_result = GroupResult(results=[])

                for args in self.send_apk_args_generator(apk_gen):
                    task = analyze_task.delay(*args)
                    group_result.add(task)

            # send only apk id and let fetch via mongodb
            else:
                log.info("sending ids of apks")

                task_group = group(
                    (analyze_task.s(*args)
                     for args in self.send_id_args_generator(apk_gen)))

                # publish tasks
                self.group_result = task_group()

            log.info("sending took %ss", (time() - start))
            sys.stderr.write("\nAnalysis progress:\n")

            # start showing analysis progress
            self.analyze_stats_view.start()

            # wait for results
            log.debug("joining on ResultGroup ... ")

            # setup callback
            callback_func = self.get_callback_func(self.success_handler,
                                                   self.error_handler)
            CeleryUtil.join_native(self.group_result,
                                   propagate=False,
                                   callback=callback_func)

            clilog.info("\nanalysis done ... ")
            log.info("distributed analysis took %ss", (time() - start))

            return self.stop_analysis_view()
        except DatabaseOpenError as e:
            log.critical(e)
            return 0

        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                log.exception(e)
            log.warn(
                "Interrupting distributed analysis ... Please wait a moment!")
            log.warn("revoking tasks on all workers ...")

            if celerysettings.CELERY_TASK_REVOCATION_ENABLED:
                # revoke tasks
                if self.group_result is None:
                    # revoke via task ids
                    log.debug("revoking while publishing tasks ...")

                    self.task_collection.revoke_all(terminate=True,
                                                    signal='SIGKILL')
                else:
                    # revoke via GroupResult if yet available/created
                    # first available after all tasks have been send
                    self.group_result.revoke(terminate=True, signal='SIGKILL')
                log.warn("revoked tasks and killed workers ...")

            #return number of analyzed apks
            return self.stop_analysis_view()
コード例 #36
0
ファイル: AnalyzeTask.py プロジェクト: umr-ds/androlyze
    def run(self,
            androscripts,
            min_script_needs,
            script_hashes,
            apk_zipfile_or_hash,
            is_hash=True,
            fast_apk=None):
        '''
        Do the analysis on the apk with the given scripts.

        Parameters
        ----------
        androscripts : list<str>
            List of package names.
        script_hashes : list<str>
            If given, set the hash for the `AndroScript`s
        min_script_needs : tuple<bool>
            See :py:method:`ScriptUtil.get_maximal_script_options`.
        apk_zipfile_or_hash : str
            The raw contents of the .apk file or the hash (sha256).
            The raw content of the .apk file (zipfile) or the hash of it (id in db).
        is_hash : bool, optional (default is True)
            Determines if `apk_zipfile_or_hash` is a hash (id).
        fast_apk : FastApk, optional (default is None)
            Holds the meta infos for the apk.

        Returns
        -------
        tuple<tuple<str, bool>>
            First component is the id of the entry
            and the second a boolean indication if the result has been stored in gridfs.
        ()
            If an error occurred.
        '''
        try:
            # method retry_arguments
            self.__retry_arguments = androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash, fast_apk
            eandro_apk = None
            do_script_hash_validation = settings.script_hash_validation_enabled(
            )

            # open database/apk storage if not already done
            # reschedule job if connection/open error
            self.__open_db()
            self.__open_apk_storage()

            # setup scripts
            if do_script_hash_validation:
                # validate sent hashes with local script hashes
                self.__setup_scripts_hash_validation(androscripts,
                                                     script_hashes)
            else:
                # reuse if possible
                self.__setup_scripts_reuse(androscripts, script_hashes)

            # open apk
            if not is_hash:
                log.info("opening apk via raw data ... ")
                eandro_apk = AnalyzeUtil.open_apk(
                    apk_or_path=apk_zipfile_or_hash, apk=fast_apk, raw=True)
            else:
                # get apk from prefetched apk pool
                eandro_apk = apk_prefetch_pool.get(apk_zipfile_or_hash, None)
                # could not prefetch
                if eandro_apk is None:
                    eandro_apk = self.__get_apk_from_storage_retry(
                        apk_zipfile_or_hash, apk=fast_apk)

            # if None, could not be opened and error has been logged
            if eandro_apk is not None:
                result = AnalyzeUtil.analyze_apk(
                    eandro_apk,
                    self.androscripts,
                    min_script_needs,
                    propagate_error=False,
                    reset_scripts=not do_script_hash_validation)

                if result is not None:
                    fastapk, script_results = result

                    log.info("analyzed %s", fastapk.short_description())
                    storage_results = self.__store_results(
                        fastapk, script_results)
                    # can be None if errorr occurred
                    if storage_results:
                        return tuple(storage_results)

            return ()
        except SoftTimeLimitExceeded:
            log.warn("Task %s exceeded it's soft time limit!", self)
            raise
        except ScriptHashValidationError:
            raise
        finally:
            # delete from pool -> we don't need it anymore in the pool
            if is_hash and apk_zipfile_or_hash in apk_prefetch_pool:
                del apk_prefetch_pool[apk_zipfile_or_hash]
コード例 #37
0
ファイル: TaskCollection.py プロジェクト: umr-ds/androlyze
 def revoke_all(self, *args, **kwargs):
     ''' Revoke tasks '''
     log.warn("will revoke %d tasks", len(self.task_ids))
     app.control.revoke(self.task_ids, *args, **kwargs)
コード例 #38
0
    def _analyze(self):
        ''' See doc of :py:method:`.BaseAnalyzer.analyze`. '''

        # try to get registered workers
        # it network fails at this point -> stop analysis
        try:
            clilog.info(CeleryUtil.get_workers_and_check_network())
        except NetworkError as e:
            log.critical(e)
            return 0

        # storage objects
        storage = self.storage

        clilog.info("Number of apks to analyze: %d", self._cnt_apks)

        try:
            # get analyze task
            analyze_task = tasks[CeleryConstants.get_analyze_task_name()]

            # create storage
            storage.create_or_open_sub_storages()

            # send tasks
            start = time()

            # apk generator over .apk or apk hashes
            apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(self.apks, force_raw_data = self.serialize_apks)

            clilog.info("Task publishing progress:")

            # send and serialize .apks
            # if analysis via path serialize them!
            if self.serialize_apks:
                log.info("sending .apks to message broker")
                self.group_result = group_result = GroupResult(results = [])

                for args in self.send_apk_args_generator(apk_gen):
                    task = analyze_task.delay(*args)
                    group_result.add(task)

            # send only apk id and let fetch via mongodb
            else:
                log.info("sending ids of apks")

                task_group = group((analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen)))

                # publish tasks
                self.group_result = task_group()

            log.info("sending took %ss", (time() - start))
            sys.stderr.write("\nAnalysis progress:\n")

            # start showing analysis progress
            self.analyze_stats_view.start()

            # wait for results
            log.debug("joining on ResultGroup ... ")

            # setup callback
            callback_func = self.get_callback_func(self.success_handler, self.error_handler)
            CeleryUtil.join_native(self.group_result, propagate = False, callback = callback_func)

            clilog.info("\nanalysis done ... ")
            log.info("distributed analysis took %ss", (time() - start))

            return self.stop_analysis_view()
        except DatabaseOpenError as e:
            log.critical(e)
            return 0

        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                log.exception(e)
            log.warn("Interrupting distributed analysis ... Please wait a moment!")
            log.warn("revoking tasks on all workers ...")

            if celerysettings.CELERY_TASK_REVOCATION_ENABLED:
                # revoke tasks
                if self.group_result is None:
                    # revoke via task ids
                    log.debug("revoking while publishing tasks ...")

                    self.task_collection.revoke_all(terminate = True, signal = 'SIGKILL')
                else:
                    # revoke via GroupResult if yet available/created
                    # first available after all tasks have been send
                    self.group_result.revoke(terminate = True, signal = 'SIGKILL')
                log.warn("revoked tasks and killed workers ...")

            #return number of analyzed apks
            return self.stop_analysis_view()