Esempio n. 1
0
    def __setup_scripts_reuse(self, androscripts, script_hashes):
        '''
        Setup scripts but first try to reuse them.
        This is done by comparing the hashes.

        If they equal -> reuse them!
        Otherwise reload from disk.

        Parameters
        ----------
        androscripts : list<str>
            List of package names.
        script_hashes : list<str>
            If given, set the hash for the `AndroScript`s

        Raises
        ------
        AnalyzeError
            If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised.
        ImportError
        '''

        # need tuple to compare
        script_hashes = tuple(script_hashes)

        script_reload_needed = script_hashes != self.script_hashes

        # script can be reused -> simply reset them
        # stupid comparison cause same scripts in different order are not reused
        # but reusing is rather intended for a reuse in the same analysis (where the order is kept)
        if not script_reload_needed:

            log.info("reusing scripts ... ")
            for s in self.androscripts:
                s.reset()

        # cannot be reused
        else:
            log.info("reloading scripts cause hashes changed ... ")

            # (re)import script modules
            script_types = ScriptUtil.import_scripts(androscripts,
                                                     via_package=True,
                                                     _reload=True)

            # instantiate scripts and get classes
            self.androscripts = ScriptUtil.instantiate_scripts(
                script_types, script_hashes=script_hashes)

            # set hashes for next comparison
            self.script_hashes = script_hashes
Esempio n. 2
0
    def chain_scripts(self):
        # use the chained_script function to do further grouping
        components = ScriptUtil.chained_script([Activities(), Services(), BroadcastReceivers(),
                                                ContentProviders(), PublicContentProviders(),
                                                 Intents()], ("components", ), name = "components")

        return [components, Permissions(), Libs(), Files(), Manifest()]
Esempio n. 3
0
    def __setup_scripts_reuse(self, androscripts, script_hashes):
        '''
        Setup scripts but first try to reuse them.
        This is done by comparing the hashes.

        If they equal -> reuse them!
        Otherwise reload from disk.

        Parameters
        ----------
        androscripts : list<str>
            List of package names.
        script_hashes : list<str>
            If given, set the hash for the `AndroScript`s

        Raises
        ------
        AnalyzeError
            If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised.
        ImportError
        '''

        # need tuple to compare
        script_hashes = tuple(script_hashes)

        script_reload_needed = script_hashes != self.script_hashes

        # script can be reused -> simply reset them
        # stupid comparison cause same scripts in different order are not reused
        # but reusing is rather intended for a reuse in the same analysis (where the order is kept)
        if not script_reload_needed:

            log.info("reusing scripts ... ")
            for s in self.androscripts: s.reset()

        # cannot be reused
        else:
            log.info("reloading scripts cause hashes changed ... ")

            # (re)import script modules
            script_types = ScriptUtil.import_scripts(androscripts, via_package = True, _reload = True)

            # instantiate scripts and get classes
            self.androscripts = ScriptUtil.instantiate_scripts(script_types, script_hashes = script_hashes)

            # set hashes for next comparison
            self.script_hashes = script_hashes
Esempio n. 4
0
    def test(script, apk_paths):
        '''
        Use this function to develop and test your script.

        E.g. find unregistered keys and other errors.

        Parameters
        ----------
        script : type
            The reference to the script which shall be tested (not instantiated!)
        apk_paths : iterable<str>
            Paths to apks

        Examples
        --------
        >>> for res in AndroScript.test(ClassDetails, ["../../../testenv/apks/a2dp.Vol.apk"]):
        ...     # get result object
        ...     print res
        ...     # get json
        ...     print res.write_to_json()

        Returns
        -------
        list<ResultObject>
            The `ResultObject` for every analyzed apk
        '''
        # no circular import
        from androlyze.analyze.Analyzer import Analyzer

        res = []
        try:
            # init scripts to get options
            inst_script_list = ScriptUtil.instantiate_scripts([script])
            script_options = ScriptUtil.get_minimum_script_options(
                inst_script_list)

            script_list = [script]
            # options: storage, script_list, script_hashes, min_script_needs, apks_or_paths
            # but the analyzer needs the scripts uninitialized!
            ana = Analyzer(None, script_list, None, script_options, apk_paths)
            res = ana.analyze(test=True)
        except AndroScriptError as e:
            log.exception(e)

        return res
Esempio n. 5
0
 def get_custom_res_obj_representation(script):
     ''' Get the representation of the custom result object.
     This is the data repr. that shall stored '''
     cres = script.cres
     if isinstance(cres, CustomResultObjInterface):
         return cres.get_custom_result_obj_repr()
     elif ScriptUtil.is_result_object(cres):
         return cres.write_to_json()
     return str(cres)
Esempio n. 6
0
 def get_custom_res_obj_representation(script):
     """ Get the representation of the custom result object.
     This is the data repr. that shall stored """
     cres = script.cres
     if isinstance(cres, CustomResultObjInterface):
         return cres.get_custom_result_obj_repr()
     elif ScriptUtil.is_result_object(cres):
         return cres.write_to_json()
     return str(cres)
Esempio n. 7
0
    def __init__(self, storage, script_list, script_hashes, *args, **kwargs):
        ''' See :py:method`.BaseAnalyzer.__init__` for details on the first attributes '''
        super(Analyzer, self).__init__(storage, script_list, script_hashes,
                                       *args, **kwargs)

        # instantiate scripts
        self.script_list = sorted(
            ScriptUtil.instantiate_scripts(script_list,
                                           script_hashes=script_hashes))
Esempio n. 8
0
    def test(script, apk_paths):
        '''
        Use this function to develop and test your script.

        E.g. find unregistered keys and other errors.

        Parameters
        ----------
        script : type
            The reference to the script which shall be tested (not instantiated!)
        apk_paths : iterable<str>
            Paths to apks

        Examples
        --------
        >>> for res in AndroScript.test(ClassDetails, ["../../../testenv/apks/a2dp.Vol.apk"]):
        ...     # get result object
        ...     print res
        ...     # get json
        ...     print res.write_to_json()

        Returns
        -------
        list<ResultObject>
            The `ResultObject` for every analyzed apk
        '''
        # no circular import
        from androlyze.analyze.Analyzer import Analyzer

        res = []
        try:
            # init scripts to get options
            inst_script_list = ScriptUtil.instantiate_scripts([script])
            script_options = ScriptUtil.get_minimum_script_options(inst_script_list)

            script_list = [script]
            # options: storage, script_list, script_hashes, min_script_needs, apks_or_paths
            # but the analyzer needs the scripts uninitialized!
            ana = Analyzer(None, script_list, None, script_options, apk_paths)
            res = ana.analyze(test = True)
        except AndroScriptError as e:
            log.exception(e)

        return res
Esempio n. 9
0
    def __init__(self,
                 script_list,
                 script_hashes,
                 min_script_needs,
                 work_queue,
                 storage,
                 sm_analyzed_apks,
                 analyzed_apks,
                 storage_results=None):
        '''
        Parameters
        ----------
        script_list: list<type<AndroScript>>
            List of `AndroScript`s references (not instantiated class!)
        script_hashes : list<str>, optional (default is None)
            If given, set the hash for the `AndroScript`s
        min_script_needs : tuple<bool>
            See :py:method:`ScriptUtil.get_maximal_script_options`.
        work_queue : Queue<str>
            Queue with paths to apks which shall be analyzed.
        storage: RedundantStorage
            The storage to store the results.
        sm_analyzed_apks : Value
            Shared memory to add number of analyzed apks.
        analyzed_apks : Queue<FastAPK>
            Holds the analyzed APKs.
        storage_results : Queue<tuple<str, bool>>, optional (default is None)
            Storage results. First component is the id of the entry and the second a boolean indication if the result has been stored in gridfs.

        Raises
        ------
        AndroScriptError
            If an error happened while initializing some `AndroScript`
        '''
        super(Worker, self).__init__()

        # instantiate scripts
        self.androscripts = sorted(
            ScriptUtil.instantiate_scripts(script_list,
                                           script_hashes=script_hashes))

        self.min_script_needs = min_script_needs

        # queues
        self.work_queue = work_queue
        self.analyzed_apks = analyzed_apks
        self.analyzed_apks.cancel_join_thread()
        self.work_queue.cancel_join_thread()

        self.storage = storage

        self.__sm_analyzed_apks = sm_analyzed_apks

        self.__storage_results = storage_results
        self.__storage_results.cancel_join_thread()
Esempio n. 10
0
    def __setup_scripts_hash_validation(self, androscripts, script_hashes):
        '''
        Setup scripts.

        Also validate submitted script hashes if script reload is needed!

        Parameters
        ----------
        androscripts : list<str>
            List of package names.
        script_hashes : list<str>
            If given, set the hash for the `AndroScript`s

        Raises
        ------
        AnalyzeError
            If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised.
        ImportError
        ScriptHashValidationError
            If the validation of script hashes fails after reloading scripts from disk.
        '''
        # need tuple to compare
        script_hashes = tuple(script_hashes)

        # import script modules
        script_types = ScriptUtil.import_scripts(androscripts,
                                                 via_package=True,
                                                 _reload=True)

        # instantiate scripts and get classes
        self.androscripts = ScriptUtil.instantiate_scripts(
            script_types,
            # needed for path calculation
            script_paths=[Util.package_name_2_path(s) for s in androscripts])

        actual_hashes = tuple([s.hash for s in self.androscripts])

        if sorted(actual_hashes) != sorted(script_hashes):
            raise ScriptHashValidationError(script_hashes, actual_hashes)
Esempio n. 11
0
    def __setup_scripts_hash_validation(self, androscripts, script_hashes):
        '''
        Setup scripts.

        Also validate submitted script hashes if script reload is needed!

        Parameters
        ----------
        androscripts : list<str>
            List of package names.
        script_hashes : list<str>
            If given, set the hash for the `AndroScript`s

        Raises
        ------
        AnalyzeError
            If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised.
        ImportError
        ScriptHashValidationError
            If the validation of script hashes fails after reloading scripts from disk.
        '''
        # need tuple to compare
        script_hashes = tuple(script_hashes)

        # import script modules
        script_types = ScriptUtil.import_scripts(androscripts, via_package = True, _reload = True)

        # instantiate scripts and get classes
        self.androscripts = ScriptUtil.instantiate_scripts(script_types,
                                                           # needed for path calculation
                                                           script_paths = [Util.package_name_2_path(s) for s in androscripts])

        actual_hashes = tuple([s.hash for s in self.androscripts])

        if sorted(actual_hashes) != sorted(script_hashes):
            raise ScriptHashValidationError(script_hashes, actual_hashes)
Esempio n. 12
0
    def __init__(self, script_list, script_hashes, min_script_needs, work_queue, storage,
                 sm_analyzed_apks, analyzed_apks, storage_results = None):
        '''
        Parameters
        ----------
        script_list: list<type<AndroScript>>
            List of `AndroScript`s references (not instantiated class!)
        script_hashes : list<str>, optional (default is None)
            If given, set the hash for the `AndroScript`s
        min_script_needs : tuple<bool>
            See :py:method:`ScriptUtil.get_maximal_script_options`.
        work_queue : Queue<str>
            Queue with paths to apks which shall be analyzed.
        storage: RedundantStorage
            The storage to store the results.
        sm_analyzed_apks : Value
            Shared memory to add number of analyzed apks.
        analyzed_apks : Queue<FastAPK>
            Holds the analyzed APKs.
        storage_results : Queue<tuple<str, bool>>, optional (default is None)
            Storage results. First component is the id of the entry and the second a boolean indication if the result has been stored in gridfs.

        Raises
        ------
        AndroScriptError
            If an error happened while initializing some `AndroScript`
        '''
        super(Worker, self).__init__()

        # instantiate scripts
        self.androscripts = sorted(ScriptUtil.instantiate_scripts(script_list, script_hashes = script_hashes))

        self.min_script_needs = min_script_needs

        # queues
        self.work_queue = work_queue
        self.analyzed_apks = analyzed_apks
        self.analyzed_apks.cancel_join_thread()
        self.work_queue.cancel_join_thread()

        self.storage = storage

        self.__sm_analyzed_apks = sm_analyzed_apks

        self.__storage_results = storage_results
        self.__storage_results.cancel_join_thread()
Esempio n. 13
0
    def store_result_for_apk(self, apk, script):
        '''
        Store the results in the file system.

        If a custom result object is used in `script` and it's not a `ResultObject`,
        str(custom res object) will be used for writing to disk.

        Parameters
        ----------
        apk: Apk
        script: AndroScript

        Raises
        ------
        FileSysStoreException

        Returns
        -------
        str
            Path to result file.
        '''
        try:
            res_filename = self.get_apk_res_filename(apk, script)
            with open(res_filename, "w") as f:
                log.debug("storing results for %s, %s to %s", apk.short_description(), script, res_filename)
                if not script.uses_custom_result_object():
                    f.write(script.res.write_to_json())
                else:
                    res = self.get_custom_res_obj_representation(script)
                    # log json if custom res obj is `ResultObject
                    if ScriptUtil.is_result_object(res):
                        res = res.write_to_json()
                    f.write(res)
            return res_filename
        except IOError as e:
            raise FileSysStoreException(res_filename, str(apk), self, e)
Esempio n. 14
0
    def __init__(self, storage, script_list, script_hashes, *args, **kwargs):
        ''' See :py:method`.BaseAnalyzer.__init__` for details on the first attributes '''
        super(Analyzer, self).__init__(storage, script_list, script_hashes, *args, **kwargs)

        # instantiate scripts
        self.script_list = sorted(ScriptUtil.instantiate_scripts(script_list, script_hashes = script_hashes))
Esempio n. 15
0
def create_analyzer(storage, script_list, apks_or_paths = None,
                   mode = ANALYZE_MODE_PARALLEL, concurrency = None,
                   serialize_apks = True
                   ):
    '''
    Create the analyzer only.

    Parameters
    ----------
    storage : RedundantStorage
        The store to use.
    script_list : list<str>
        List of paths to scripts (complete filename with extension).
    apks_or_paths: list<str> or list<Apk>, optional (default is None)
        List of `Apk` or paths to the apks which shall be analyzed with the given scripts
        If you analyze from paths the `import_date` is not set!
    mode : str, optional (default is `ANALYZE_MODE_PARALLEL`)
        Do an parallel analysis by default. Choose between : , , .
    concurrency : int, optional (default is number of cpu cores)
        Number of workers to spawn.
    serialize_apks : bool, optional (default is True)
        If true, serialize .apk .
        Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
        Be sure to import the apks to the result db first!
    '''
    from androlyze.model.script import ScriptUtil
    from androlyze.analyze.exception import AndroScriptError

    try:
        # list<type<AndroScript>>
        androscript_list = ScriptUtil.import_scripts(script_list)
        instantiated_scripts = sorted(ScriptUtil.instantiate_scripts(androscript_list, script_paths = script_list))

        if len(instantiated_scripts) == 0:
            log.warn("No scripts supplied!")
            return

        # get hashes for `AndroScript`s so that we can set the hash directly next time we instantiate the script
        script_hashes = [s.hash for s in instantiated_scripts]
        min_script_needs = ScriptUtil.get_minimum_script_options(instantiated_scripts)

        # log infos about scripts
        clilog.info('Loaded scripts:\n%s', '\n'.join((str(s) for s in instantiated_scripts)))
        log.info(ScriptUtil.androscript_options_descr(instantiated_scripts))

        if apks_or_paths:

            def create_analyzer():

                analyzer = None
                # argument for BaseAnalyzer
                args = storage, androscript_list, script_hashes, min_script_needs, apks_or_paths
                log.info("Mode: %s", mode)

                # normal analyzer
                if mode == ANALYZE_MODE_NON_PARALLEL:
                    from androlyze.analyze.Analyzer import Analyzer
                    analyzer = Analyzer(*args)
                # use parallel analyzer
                elif mode == ANALYZE_MODE_PARALLEL:
                    from androlyze.analyze.parallel.ParallelAnalyzer import ParallelAnalyzer
                    analyzer = ParallelAnalyzer(*args, concurrency = concurrency)
                # use distributed one
                elif mode == ANALYZE_MODE_DISTRIBUTED:
                    from androlyze.analyze.distributed.DistributedAnalyzer import DistributedAnalyzer
                    analyzer = DistributedAnalyzer(*args, concurrency = concurrency, serialize_apks = serialize_apks)

                return analyzer

            return create_analyzer()

    except ApkImportError as e:
        log.warn(e)
    except IOError as e:
        log.warn(AndroScriptError(e.filename, caused_by = e))
        sys.exit(1)
    except ImportError as e:
        log.exception(e)
    except Exception as e:
        log.exception(e)
Esempio n. 16
0
 def write_to_json(self):
     ''' Returns a json representation as str '''
     from androlyze.model.script import ScriptUtil
     return ScriptUtil.dict2json(self.description_dict())
Esempio n. 17
0
    def run_action(self, cmd):
        ''' Run an action specified by `cmd`(see COMMAND_ prefixed variables) '''

        parser = self.parser
        args = self.args

        # check which command has been used
        if cmd is None:

            # no command specified through program name -> get it from argparser
            cmd = args.command
            
        if cmd in COMMANDS_ALL:
            hashes, package_names, tags = CLIUtil.get_filter_options_from_cli(args)
            yes = args.yes

            if cmd == COMMAND_QUERY:
                self.action_query(hashes, package_names, tags, yes)

            # dblyze -> do the analysis results evaluation            
            elif cmd == COMMAND_EVAL:
                dblyze_scripts = ScriptUtil.import_scripts(args.scripts, clazz_name = "Eval")
                for dblyze_script in dblyze_scripts:
                    dblyze_script().evaluate(self.storage)
                
            # sync from result db to file sys
            elif cmd == COMMAND_SYNC:
                total_entries = androlyze.action_sync_fs(self.storage, lambda _ : False)

                CLIUtil.cli_check_n_exec(androlyze.action_sync_fs,
                                         prompt_prefix = "Will download %d entries from result database!" % total_entries,
                                         circumvent_check = args.yes,
                                         args = (self.storage, lambda _ : True)
                                         )
            else:
                # print welcome message
                clilog.info("Welcome to %s!\n" % PROJECT_NAME)

                # import command
                if cmd == COMMAND_IMPORT:
                    apks_or_paths, _ = self.get_apks_or_paths_from_cli()
                    tag = args.tag
                    copy2disk, copy2db, update, concurrency = args.copy_disk, args.copy_db, args.update, args.concurrency
                    if not update:
                        log.warn('''--update not supplied.
No update of already present apks in database will be done!''')
                    androlyze.action_import_apks(self.storage, apks_or_paths, copy2disk, copy2db, update, tag, concurrency = concurrency)
                # analyze command
                elif cmd == COMMAND_ANALYZE:
                    # androguard path has to be set before
                    from androlyze import action_analyze

                    # sort apks ?
                    get_apks_kwargs = {}
                    no_sort_by_code_size = args.no_sort_code_size
                    if not no_sort_by_code_size:
                        # sort apks by app code size for better scheduling
                        get_apks_kwargs = dict(order_by = TABLE_APK_IMPORT_KEY_SIZE_APP_CODE, ascending = False)
                    apks_or_paths, _ = self.get_apks_or_paths_from_cli(**get_apks_kwargs)

                    # debug infos
                    if not no_sort_by_code_size and not args.apks:
                        apks_or_paths, _it = itertools.tee(apks_or_paths)
                        clilog.info('Using Code Size Scheduling for faster analysis!')
                        log.debug('\n'.join(('%s: %s' % (x.package_name, x.size_app_code) for x in _it)))

                    scripts = args.scripts
                    parallel_mode, concurrency, send_id = self.__load_parallel_settings()

                    # get analysis mode
                    analyze_mode = None
                    if parallel_mode == PARALLELIZATION_MODE_DISTRIBUTED:
                        analyze_mode = ANALYZE_MODE_DISTRIBUTED
                    elif parallel_mode == PARALLELIZATION_MODE_NON_PARALLEL:
                        analyze_mode = ANALYZE_MODE_NON_PARALLEL
                    else:
                        analyze_mode = ANALYZE_MODE_PARALLEL
                    action_analyze(self.storage, scripts, apks_or_paths,
                                   mode = analyze_mode, concurrency = concurrency,
                                   serialize_apks = not send_id)
                # delete command
                elif cmd == COMMAND_DELETE:
                    self.action_delete(parser, hashes, package_names, tags, yes)

                clilog.info("done")
Esempio n. 18
0
 def write_to_json(self):
     ''' Returns a json representation as str '''
     from androlyze.model.script import ScriptUtil
     return ScriptUtil.dict2json(self.description_dict())