Beispiel #1
0
def write_analyze_task_results_to_fs(storage, group_result, chunked = False):
    '''
    Get successful task results and write them to disk if enabled.

    Parameters
    ----------
    storage: RedundantStorage
    group_result : GroupResult
    chunked : bool, optional (default is False)
        If work has been divided into chunks.

    Returns
    -------
    int
        Number of successful tasks
    '''
    if group_result is not None:
        results = get_successful_analyze_task_results(group_result, chunked = chunked)

        # no result writing to disk wanted
        if not storage.fs_storage_disabled():
            clilog.info("Fetching all analysis results for storage ...")
            if results:
                try:
                    storage.fetch_results_from_mongodb(results)
                except DatabaseLoadException as e:
                    log.exception(e)
            return len(results)

    return 0
Beispiel #2
0
    def action_delete(self, parser, hashes, package_names, tags, yes):
        ''' Delete from the database specified by `parser` args '''

        args = self.args
        whole_db = args.all
        db = args.delete

        if whole_db:
            cli_check_n_exec(prompt_prefix="Do you really want to delete the whole database?", func=lambda:True, circumvent_check=not whole_db)
        # place after run check!
        circumvent_check = yes or whole_db

        # import db
        if db == SUBCOMMAND_DELETE_IMPORT:
            cli_check_n_exec(androlyze.action_delete_apks_import, circumvent_check=circumvent_check, args=(self.storage, args.delete_apk, hashes, package_names, tags, whole_db))
        # res db
        elif db == SUBCOMMAND_DELETE_RESULT:
            kwargs = CLIUtil.get_result_db_filter_args_from_argparser(args)

            if not kwargs and not whole_db:
                raise CLIError('You did not supply any filter argument!\nIf you want to delete the whole db, use the --all switch!', parser)

            kwargs["whole_db"] = whole_db
            # delete from res db
            n = cli_check_n_exec(androlyze.action_delete_apks_res, circumvent_check = circumvent_check, args=(self.storage,), kwargs=kwargs)
            if not whole_db:
                clilog.info("Deleted %s file/document(s) !" % n)
Beispiel #3
0
    def import_apks(apk_paths):
        apk_importer = ApkImporter(apk_paths, storage)
        for apk in apk_importer.import_apks(copy_apk = copy_apk, copy_to_mongodb = copy_to_mongodb,
                                                update = update, tag = tag):

            clilog.info("imported %s", apk.short_description())

            # use shared memory counter if given
            if cnt_imported_apks is not None:
                with cnt_imported_apks.get_lock():
                    cnt_imported_apks.value += 1
def rewrite_configs():
    clilog.info("using environment variables for service discovery (kubernetes): %s", ', '.join(envs_kubernetes))
    clilog.info("using environment variables for service discovery (docker): %s", ', '.join(envs_docker))
    clilog.info("rabbitmq: host: %s, port: %s", *get_rabbitmq_conn_info())
    clilog.info("mongodb: host: %s, port: %s", *get_mongodb_conn_info())

    def rewrite_config_key(key, value, config_path = CONFIG_PATH):
        key = key.strip()
        value = value.strip()
        return run(r""" sed -i.bak "s/\(%s[ ]*=[ ]*\).*/\1%s/g" %s """ % (key, value, config_path))
        
    def rewrite_amqp(user = r"\2", pw = r"\3", host = r"\4", port = r"\5", vhost = r"\6", config_path = CONFIG_PATH):
        any(
            run(r""" sed -i.bak "s/\(%s[ ]*=[ ]*amqp:[/][/]\)\(.*\)[:]\(.*\)[@]\(.*\)[:]\(.*\)[/]\(.*\)/\1%s:%s@%s:%s\/%s/g" %s """ % (key, user, pw, host, port, vhost, config_path))
            for key in [KEY_BROKER_URL, KEY_BROKER_BACKEND_URL]
        )
            
    mongodb_ip, mongodb_port = get_mongodb_conn_info()
    if mongodb_ip is not None:
        rewrite_config_key(KEY_RESULT_DB_IP, mongodb_ip)
    
    if mongodb_port is not None:
        rewrite_config_key(KEY_RESULT_DB_PORT, mongodb_port)
        
    rabbitmq_ip, rabbitmq_port = get_rabbitmq_conn_info()
    rewrite_amqp(host = rabbitmq_ip, port = rabbitmq_port)
Beispiel #5
0
def print_query_result_db(res, distict_generator = False, count = False, raw = False, interactive = True):
    '''
    Print the results from the result db (mongodb).

    Parameters
    ----------
    count : bool, optional (default is False)
        Only print count, not results
    distict_generator : bool, optional (default is False)
        Res is generator<object> created from the distinct(...) method of mongodb.
        If generaor<dict>, convert each dict to json.
        Otherwise just print.
    raw : bool, optional (default is False)
        Print raw data from gridfs
        Otherwise print json.
    res : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor
        First if non_document and non_document_raw.
        Second if disctinct values wanted.
        Thirst otherwise.
        The results to print
    interactive: bool, optional (default is True)
        Iterate interactive through the result cursor
    '''
    from pymongo.errors import PyMongoError

    try:
        # print count
        if count:
            cnt = 0
            # res is list
            if distict_generator:
                cnt = len(res)
            # res is cursor
            else:
                cnt = res.count()
            clilog.info(cnt)
        else:
            if distict_generator:
                for r in sorted(res):
                    if isinstance(r, dict):
                        r = dict2json(res)
                    clilog.info(r)
            else:
                for i, res in enumerate(res, 1):
                    # interactive result view
                    if i != 1 and interactive and raw_input('Press any key to view next result or abort with "no" !)').lower() == 'no':
                        break
                    sys.stderr.write('/* {} */\n'.format(i))
                    # print raw data
                    if raw:
                        # gridfs.grid_file.GridOut
                        for gridout_obj in res:
                            clilog.info(gridout_obj)
                    # print json
                    else:
                        clilog.info(dict2json(res))

    except PyMongoError as e:
        log.exception(e)
Beispiel #6
0
    def action_query(self, hashes, package_names, tags, yes):
        ''' Query the database '''
        args = self.args
        parser = self.parser

        # check on which database to query
        # get from argparser
        query_dst = args.query_dst
        if query_dst == SUBCOMMAND_QUERY_IMPORT:
            clilog.info('\n'.join(androlyze.action_query_import_db(self.storage, args.query_import_cmd, hashes, package_names, tags)))
        elif query_dst == SUBCOMMAND_QUERY_RESULT:
            kwargs = CLIUtil.get_result_db_filter_args_from_argparser(args)
            if args.show_id:
                kwargs["remove_id_field"] = not args.show_id

            distinct_key = None
            if args.distinct is not None:
                distinct_key = args.distinct
            # get distinct values for script name
            elif args.list_ran_scripts:
                distinct_key = MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_SCRIPT_META_NAME, args.non_document)

            no_args_supplied = len(kwargs) == 0 and not args.latest and not args.count and distinct_key is None
            whole_db = args.all
            raw = args.raw

            # update with basic result query options
            kwargs.update(CLIUtil.get_basic_result_query_options(args))

            kwargs.update(dict(include_fields=args.include_fields, exclude_fields=args.exclude_fields, non_document_raw=raw, distinct_key = distinct_key))

            if no_args_supplied and not whole_db:
                raise CLIError('Not enough arguments supplied!\nIf you want to dump the whole db, use the --all switch!', parser)

            res = cli_check_n_exec(
                androlyze.action_query_result_db,
                prompt_prefix='Will print whole results db!',
                circumvent_check=not no_args_supplied or yes,
                args=(self.storage, CLIUtil.get_checks_from_cli(args)),
                kwargs=kwargs)

            # log results
            print_query_result_db(res, distict_generator=distinct_key is not None, count=args.count, raw=raw, interactive = not args.not_interactive)
    def __init__(self, *args, **kwargs):
        '''
        See :py:method`.BaseAnalyzer.__init__` for details.

        Parameters
        ----------
        serialize_apks : bool, optional (default is True)
            If true, serialize .apk .
            Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
            Be sure to import the apks to the result db first!
        '''
        serialize_apks = kwargs.get("serialize_apks", True)

        super(DistributedAnalyzer, self).__init__(*args, **kwargs)

        # list(apk_path, _apk, is_apk)
        self.__apks = list(AnalyzeUtil.apk_gen(self.apks_or_paths))

        # result group
        self.group_result = None

        # serialize .apk data
        self.__serialize_apks = serialize_apks
        if serialize_apks:
            clilog.info("Will serialize .apk data!")
        else:
            clilog.info("Will send id of apks!")

        self.analyze_stats_view = None

        # stats view for cli
        self.analyze_stats_view = AnalysisStatsView(self._cnt_apks)
        self.analyze_stats_view.daemon = True

        # the `TaskCollection` for the analysis tasks
        self.task_collection = TaskCollection(self._cnt_apks)

        # register celery signals
        self.register_signals()

        self.lock = Lock()
    def __init__(self, *args, **kwargs):
        '''
        See :py:method`.BaseAnalyzer.__init__` for details.

        Parameters
        ----------
        serialize_apks : bool, optional (default is True)
            If true, serialize .apk .
            Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
            Be sure to import the apks to the result db first!
        '''
        serialize_apks = kwargs.get("serialize_apks", True)

        super(DistributedAnalyzer, self).__init__(*args, **kwargs)

        # list(apk_path, _apk, is_apk)
        self.__apks = list(AnalyzeUtil.apk_gen(self.apks_or_paths))

        # result group
        self.group_result = None

        # serialize .apk data
        self.__serialize_apks = serialize_apks
        if serialize_apks:
            clilog.info("Will serialize .apk data!")
        else:
            clilog.info("Will send id of apks!")

        self.analyze_stats_view = None

        # stats view for cli
        self.analyze_stats_view = AnalysisStatsView(self._cnt_apks)
        self.analyze_stats_view.daemon = True

        # the `TaskCollection` for the analysis tasks
        self.task_collection = TaskCollection(self._cnt_apks)

        # register celery signals
        self.register_signals()

        self.lock = Lock()
Beispiel #9
0
def action_delete_apks_import(storage, delete_apk = False, hashes = None, package_names = None, tags = None, select_whole_db = False):
    ''' Delete from the import storage (database and/or filesys)

    Parameters
    ----------
    storage : RedundantStorage
        The store to use.
    delete_apk : boolean, optional (default is False)
    hashes : iterable<str>, optional (default is None)
    package_names : iterable<str>, optional (default is None)
    tags : iterable<str>, optional (default is None)
    select_whole_db : boolean, optional (default is False)
        If true, select the whole import database! Be careful!
        This means we do not take `hashes`, `package_names` and `tags` into acccount!

    Raises
    ------
    ValueError
    '''
    try:
        apks = None
        if select_whole_db:
            apks = action_query_import_db(storage, COMMAND_QUERY_APKS, hashes, package_names, tags)
        # If don't delete whole database!!!!!
        elif len(Util.filter_not_none((hashes, package_names, tags))) > 0:
            apks = action_query_import_db(storage, COMMAND_QUERY_APKS, hashes, package_names, tags)
        else:
            raise ValueError('''Neither hashes nor package names nor tags specified!
             If you wan't do use the whole database, set `select_whole_db` to true.
             ''')

        # use list, otherwise we have duplicates due to the generator
        for apk in list(apks):
            if delete_apk:
                clilog.info("Will delete from database and file system: \n%s ", apk.detailed_description())
            else:
                clilog.info("Will delete %s from database: %s ", apk.short_description(), storage.import_db_storage)
            storage.delete_entry_for_apk(apk, delete_apk)
    except StorageException as e:
        log.warn(e)
Beispiel #10
0
def rewrite_configs():
    clilog.info(
        "using environment variables for service discovery (kubernetes): %s",
        ', '.join(envs_kubernetes))
    clilog.info(
        "using environment variables for service discovery (docker): %s",
        ', '.join(envs_docker))
    clilog.info("rabbitmq: host: %s, port: %s", *get_rabbitmq_conn_info())
    clilog.info("mongodb: host: %s, port: %s", *get_mongodb_conn_info())

    def rewrite_config_key(key, value, config_path=CONFIG_PATH):
        key = key.strip()
        value = value.strip()
        return run(r""" sed -i.bak "s/\(%s[ ]*=[ ]*\).*/\1%s/g" %s """ %
                   (key, value, config_path))

    def rewrite_amqp(user=r"\2",
                     pw=r"\3",
                     host=r"\4",
                     port=r"\5",
                     vhost=r"\6",
                     config_path=CONFIG_PATH):
        any(
            run(r""" sed -i.bak "s/\(%s[ ]*=[ ]*amqp:[/][/]\)\(.*\)[:]\(.*\)[@]\(.*\)[:]\(.*\)[/]\(.*\)/\1%s:%s@%s:%s\/%s/g" %s """
                % (key, user, pw, host, port, vhost, config_path))
            for key in [KEY_BROKER_URL, KEY_BROKER_BACKEND_URL])

    mongodb_ip, mongodb_port = get_mongodb_conn_info()
    if mongodb_ip is not None:
        rewrite_config_key(KEY_RESULT_DB_IP, mongodb_ip)

    if mongodb_port is not None:
        rewrite_config_key(KEY_RESULT_DB_PORT, mongodb_port)

    rabbitmq_ip, rabbitmq_port = get_rabbitmq_conn_info()
    rewrite_amqp(host=rabbitmq_ip, port=rabbitmq_port)
    def _analyze(self):
        ''' See doc of :py:method:`.BaseAnalyzer.analyze`. '''

        # try to get registered workers
        # it network fails at this point -> stop analysis
        try:
            clilog.info(CeleryUtil.get_workers_and_check_network())
        except NetworkError as e:
            log.critical(e)
            return 0

        # storage objects
        storage = self.storage

        clilog.info("Number of apks to analyze: %d", self._cnt_apks)

        try:
            # get analyze task
            analyze_task = tasks[CeleryConstants.get_analyze_task_name()]

            # create storage
            storage.create_or_open_sub_storages()

            # send tasks
            start = time()

            # apk generator over .apk or apk hashes
            apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(self.apks, force_raw_data = self.serialize_apks)

            clilog.info("Task publishing progress:")

            # send and serialize .apks
            # if analysis via path serialize them!
            if self.serialize_apks:
                log.info("sending .apks to message broker")
                self.group_result = group_result = GroupResult(results = [])

                for args in self.send_apk_args_generator(apk_gen):
                    task = analyze_task.delay(*args)
                    group_result.add(task)

            # send only apk id and let fetch via mongodb
            else:
                log.info("sending ids of apks")

                task_group = group((analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen)))

                # publish tasks
                self.group_result = task_group()

            log.info("sending took %ss", (time() - start))
            sys.stderr.write("\nAnalysis progress:\n")

            # start showing analysis progress
            self.analyze_stats_view.start()

            # wait for results
            log.debug("joining on ResultGroup ... ")

            # setup callback
            callback_func = self.get_callback_func(self.success_handler, self.error_handler)
            CeleryUtil.join_native(self.group_result, propagate = False, callback = callback_func)

            clilog.info("\nanalysis done ... ")
            log.info("distributed analysis took %ss", (time() - start))

            return self.stop_analysis_view()
        except DatabaseOpenError as e:
            log.critical(e)
            return 0

        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                log.exception(e)
            log.warn("Interrupting distributed analysis ... Please wait a moment!")
            log.warn("revoking tasks on all workers ...")

            if celerysettings.CELERY_TASK_REVOCATION_ENABLED:
                # revoke tasks
                if self.group_result is None:
                    # revoke via task ids
                    log.debug("revoking while publishing tasks ...")

                    self.task_collection.revoke_all(terminate = True, signal = 'SIGKILL')
                else:
                    # revoke via GroupResult if yet available/created
                    # first available after all tasks have been send
                    self.group_result.revoke(terminate = True, signal = 'SIGKILL')
                log.warn("revoked tasks and killed workers ...")

            #return number of analyzed apks
            return self.stop_analysis_view()
Beispiel #12
0
def action_import_apks(storage, apk_paths,
                       copy_apk = False, copy_to_mongodb = False,
                       update = False, tag = None,
                       # shared memory
                       cnt_imported_apks = None, total_apk_count = None, import_finished = None,
                       # concurrent settings
                       concurrency = None
                       ):

    ''' Import the apks from the `apk_paths` and create the file system structure
    where the results will be kept, specified by `storage`.

    Parameters
    ----------
    storage : RedundantStorage
        The store to use.
    apk_paths : iterable<str>
        The apk files and/or directories.
    copy_apk : bool
        Import the apk file to the `import_dir` (copy it).
    copy_to_mongodb : bool, optional (default is False)
        Also import into MongoDB. Useful for the distributed analysis.
    update : bool
        Update apks that have already been imported.
    tag : str, optional (default is None)
        Some tag
    cnt_imported_apks : multiprocessing.Value<int>, optional (default is None)
        If given, use for progress updating.
    total_apk_count : multiprocessing.Value<int>, optional (default is None)
        If given, use for total count of apks.
    import_finished : multiprocessing.Value<byte>, optional (default is None)
        If given, use to signal that import has been completed.
    concurrency : int, optional (default is number of cpus)
        Number of processes to use for the import.
    '''
    from androlyze.loader.ApkImporter import ApkImporter

    # get single paths to apks so we get the correct total count of apks
    clilog.info("looking for apks in given paths ... ")
    apk_paths = ApkImporter.get_apks_from_list_or_dir(apk_paths)

    if total_apk_count is not None:
        # may be time consuming for recursive lookup
        apk_paths, total_apk_count.value = Util.count_iterable_n_clone(apk_paths)

    # create count if not given
    if cnt_imported_apks is None:
        cnt_imported_apks = Value('i', 0, lock = RLock())

    # set concurrency
    if concurrency is None:
        concurrency = cpu_count()
    log.warn("Using %d processes", concurrency)

    clilog.info("Storage dir is %s" % storage.fs_storage.store_root_dir)
    if copy_apk:
        clilog.info("Copying APKs to %s ..." % storage.fs_storage.store_root_dir)

    def import_apks(apk_paths):
        apk_importer = ApkImporter(apk_paths, storage)
        for apk in apk_importer.import_apks(copy_apk = copy_apk, copy_to_mongodb = copy_to_mongodb,
                                                update = update, tag = tag):

            clilog.info("imported %s", apk.short_description())

            # use shared memory counter if given
            if cnt_imported_apks is not None:
                with cnt_imported_apks.get_lock():
                    cnt_imported_apks.value += 1

    pool = []


    # don't convert generator to list if only 1 process wanted
    apk_paths = [apk_paths] if concurrency == 1 else Util.split_n_uniform_distri(list(apk_paths), concurrency)

    # start parallel import
    # multiprocessing's pool causes pickle errors
    for i in range(concurrency):
        p = Process(target = import_apks, args = (apk_paths[i], ))
        log.debug("starting process %s", p)
        pool.append(p)
        p.start()

    for it in pool:
        log.debug("joined on process %s", p)
        it.join()

    apks_imported = cnt_imported_apks.value != 0
    # show some message that no APK has been imported
    if not apks_imported:
        log.warn("No .apk file has been imported! This means no .apk file has been found or they already have been imported.")
    else:
        clilog.info("done")

    # because not all apks may be importable, we cannot use we count for signal that the import is done
    if import_finished is not None:
        import_finished.value = 1

    clilog.info("Imported %d apks", cnt_imported_apks.value)
Beispiel #13
0
def create_analyzer(storage, script_list, apks_or_paths = None,
                   mode = ANALYZE_MODE_PARALLEL, concurrency = None,
                   serialize_apks = True
                   ):
    '''
    Create the analyzer only.

    Parameters
    ----------
    storage : RedundantStorage
        The store to use.
    script_list : list<str>
        List of paths to scripts (complete filename with extension).
    apks_or_paths: list<str> or list<Apk>, optional (default is None)
        List of `Apk` or paths to the apks which shall be analyzed with the given scripts
        If you analyze from paths the `import_date` is not set!
    mode : str, optional (default is `ANALYZE_MODE_PARALLEL`)
        Do an parallel analysis by default. Choose between : , , .
    concurrency : int, optional (default is number of cpu cores)
        Number of workers to spawn.
    serialize_apks : bool, optional (default is True)
        If true, serialize .apk .
        Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
        Be sure to import the apks to the result db first!
    '''
    from androlyze.model.script import ScriptUtil
    from androlyze.analyze.exception import AndroScriptError

    try:
        # list<type<AndroScript>>
        androscript_list = ScriptUtil.import_scripts(script_list)
        instantiated_scripts = sorted(ScriptUtil.instantiate_scripts(androscript_list, script_paths = script_list))

        if len(instantiated_scripts) == 0:
            log.warn("No scripts supplied!")
            return

        # get hashes for `AndroScript`s so that we can set the hash directly next time we instantiate the script
        script_hashes = [s.hash for s in instantiated_scripts]
        min_script_needs = ScriptUtil.get_minimum_script_options(instantiated_scripts)

        # log infos about scripts
        clilog.info('Loaded scripts:\n%s', '\n'.join((str(s) for s in instantiated_scripts)))
        log.info(ScriptUtil.androscript_options_descr(instantiated_scripts))

        if apks_or_paths:

            def create_analyzer():

                analyzer = None
                # argument for BaseAnalyzer
                args = storage, androscript_list, script_hashes, min_script_needs, apks_or_paths
                log.info("Mode: %s", mode)

                # normal analyzer
                if mode == ANALYZE_MODE_NON_PARALLEL:
                    from androlyze.analyze.Analyzer import Analyzer
                    analyzer = Analyzer(*args)
                # use parallel analyzer
                elif mode == ANALYZE_MODE_PARALLEL:
                    from androlyze.analyze.parallel.ParallelAnalyzer import ParallelAnalyzer
                    analyzer = ParallelAnalyzer(*args, concurrency = concurrency)
                # use distributed one
                elif mode == ANALYZE_MODE_DISTRIBUTED:
                    from androlyze.analyze.distributed.DistributedAnalyzer import DistributedAnalyzer
                    analyzer = DistributedAnalyzer(*args, concurrency = concurrency, serialize_apks = serialize_apks)

                return analyzer

            return create_analyzer()

    except ApkImportError as e:
        log.warn(e)
    except IOError as e:
        log.warn(AndroScriptError(e.filename, caused_by = e))
        sys.exit(1)
    except ImportError as e:
        log.exception(e)
    except Exception as e:
        log.exception(e)
Beispiel #14
0
def print_query_result_db(res,
                          distict_generator=False,
                          count=False,
                          raw=False,
                          interactive=True):
    '''
    Print the results from the result db (mongodb).

    Parameters
    ----------
    count : bool, optional (default is False)
        Only print count, not results
    distict_generator : bool, optional (default is False)
        Res is generator<object> created from the distinct(...) method of mongodb.
        If generaor<dict>, convert each dict to json.
        Otherwise just print.
    raw : bool, optional (default is False)
        Print raw data from gridfs
        Otherwise print json.
    res : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor
        First if non_document and non_document_raw.
        Second if disctinct values wanted.
        Thirst otherwise.
        The results to print
    interactive: bool, optional (default is True)
        Iterate interactive through the result cursor
    '''
    from pymongo.errors import PyMongoError

    try:
        # print count
        if count:
            cnt = 0
            # res is list
            if distict_generator:
                cnt = len(res)
            # res is cursor
            else:
                cnt = res.count()
            clilog.info(cnt)
        else:
            if distict_generator:
                for r in sorted(res):
                    if isinstance(r, dict):
                        r = dict2json(res)
                    clilog.info(r)
            else:
                for i, res in enumerate(res, 1):
                    # interactive result view
                    if i != 1 and interactive and raw_input(
                            'Press any key to view next result or abort with "no" !)'
                    ).lower() == 'no':
                        break
                    sys.stderr.write('/* {} */\n'.format(i))
                    # print raw data
                    if raw:
                        # gridfs.grid_file.GridOut
                        for gridout_obj in res:
                            clilog.info(gridout_obj)
                    # print json
                    else:
                        clilog.info(dict2json(res))

    except PyMongoError as e:
        log.exception(e)
Beispiel #15
0
    def _analyze(self):
        ''' See doc of :py:method:`.BaseAnalyzer.analyze`. '''

        # try to get registered workers
        # it network fails at this point -> stop analysis
        try:
            clilog.info(CeleryUtil.get_workers_and_check_network())
        except NetworkError as e:
            log.critical(e)
            return 0

        # storage objects
        storage = self.storage

        clilog.info("Number of apks to analyze: %d", self._cnt_apks)

        try:
            # get analyze task
            analyze_task = tasks[CeleryConstants.get_analyze_task_name()]

            # create storage
            storage.create_or_open_sub_storages()

            # send tasks
            start = time()

            # apk generator over .apk or apk hashes
            apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(
                self.apks, force_raw_data=self.serialize_apks)

            clilog.info("Task publishing progress:")

            # send and serialize .apks
            # if analysis via path serialize them!
            if self.serialize_apks:
                log.info("sending .apks to message broker")
                self.group_result = group_result = GroupResult(results=[])

                for args in self.send_apk_args_generator(apk_gen):
                    task = analyze_task.delay(*args)
                    group_result.add(task)

            # send only apk id and let fetch via mongodb
            else:
                log.info("sending ids of apks")

                task_group = group(
                    (analyze_task.s(*args)
                     for args in self.send_id_args_generator(apk_gen)))

                # publish tasks
                self.group_result = task_group()

            log.info("sending took %ss", (time() - start))
            sys.stderr.write("\nAnalysis progress:\n")

            # start showing analysis progress
            self.analyze_stats_view.start()

            # wait for results
            log.debug("joining on ResultGroup ... ")

            # setup callback
            callback_func = self.get_callback_func(self.success_handler,
                                                   self.error_handler)
            CeleryUtil.join_native(self.group_result,
                                   propagate=False,
                                   callback=callback_func)

            clilog.info("\nanalysis done ... ")
            log.info("distributed analysis took %ss", (time() - start))

            return self.stop_analysis_view()
        except DatabaseOpenError as e:
            log.critical(e)
            return 0

        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                log.exception(e)
            log.warn(
                "Interrupting distributed analysis ... Please wait a moment!")
            log.warn("revoking tasks on all workers ...")

            if celerysettings.CELERY_TASK_REVOCATION_ENABLED:
                # revoke tasks
                if self.group_result is None:
                    # revoke via task ids
                    log.debug("revoking while publishing tasks ...")

                    self.task_collection.revoke_all(terminate=True,
                                                    signal='SIGKILL')
                else:
                    # revoke via GroupResult if yet available/created
                    # first available after all tasks have been send
                    self.group_result.revoke(terminate=True, signal='SIGKILL')
                log.warn("revoked tasks and killed workers ...")

            #return number of analyzed apks
            return self.stop_analysis_view()
Beispiel #16
0
def run(command):
    clilog.info(">>> %s", command)
    return os.system(command)
Beispiel #17
0
def run(command):
    clilog.info(">>> %s", command)
    return os.system(command)
Beispiel #18
0
 def evaluate(self, storage, *args, **kwargs):
     self.storage = storage
     clilog.info("evaluating '%s' version: %s", self.script_name, self.version)
     return self._evaluate(storage, *args, **kwargs)
Beispiel #19
0
    def _analyze(self, test = False):
        '''
        Start the analysis and store the results in the predefined place.

        Parameters
        ----------
        test : bool, optional (default is False)
            Use for testing. Will not store any result !

        Returns
        -------
        int
            Number of analyzed apks
        list<ResultObject>
            List of the results (only if `test`)
        '''
        androscripts = self.script_list

        # collect results for test mode
        test_results = []

        # get minimum options for all scripts -> boost performance
        # use only as much options as needed!

        # run over apks
        for apk_path, _apk, _ in apk_gen(self.apks_or_paths):

            eandro_apk = open_apk(apk_path, apk=_apk)

            # if is None error happened and has been logged
            # otherwise proceed with analysis
            if eandro_apk is not None:

                # tuple<FastApk, AndroScript>
                res = AnalyzeUtil.analyze_apk(eandro_apk, androscripts, self.min_script_needs, reset_scripts = True)

                if res:
                    # unpack results
                    fastapk, script_results = res

                    # store results if not in test mode
                    if not test:
                        for script in script_results:

                            try:
                                storage_result = AnalyzeUtil.store_script_res(self.storage, script, fastapk)
                                # keep storage results
                                self.add_storage_result(storage_result)
                            except StorageException as e:
                                log.warn(e)
                    else:
                        # deliver result object in testing mode
                        test_results += [s.res for s in script_results]

                    clilog.info("analyzed %s", fastapk.short_description())

                # increment counter, no lock needed, nobody else is writing to this value
                self.cnt_analyzed_apks.value += 1

        if test:
            return test_results

        return self.cnt_analyzed_apks.value
Beispiel #20
0
    def _analyze(self, test=False):
        '''
        Start the analysis and store the results in the predefined place.

        Parameters
        ----------
        test : bool, optional (default is False)
            Use for testing. Will not store any result !

        Returns
        -------
        int
            Number of analyzed apks
        list<ResultObject>
            List of the results (only if `test`)
        '''
        androscripts = self.script_list

        # collect results for test mode
        test_results = []

        # get minimum options for all scripts -> boost performance
        # use only as much options as needed!

        # run over apks
        for apk_path, _apk, _ in apk_gen(self.apks_or_paths):

            eandro_apk = open_apk(apk_path, apk=_apk)

            # if is None error happened and has been logged
            # otherwise proceed with analysis
            if eandro_apk is not None:

                # tuple<FastApk, AndroScript>
                res = AnalyzeUtil.analyze_apk(eandro_apk,
                                              androscripts,
                                              self.min_script_needs,
                                              reset_scripts=True)

                if res:
                    # unpack results
                    fastapk, script_results = res

                    # store results if not in test mode
                    if not test:
                        for script in script_results:

                            try:
                                storage_result = AnalyzeUtil.store_script_res(
                                    self.storage, script, fastapk)
                                # keep storage results
                                self.add_storage_result(storage_result)
                            except StorageException as e:
                                log.warn(e)
                    else:
                        # deliver result object in testing mode
                        test_results += [s.res for s in script_results]

                    clilog.info("analyzed %s", fastapk.short_description())

                # increment counter, no lock needed, nobody else is writing to this value
                self.cnt_analyzed_apks.value += 1

        if test:
            return test_results

        return self.cnt_analyzed_apks.value
Beispiel #21
0
    def run_action(self, cmd):
        ''' Run an action specified by `cmd`(see COMMAND_ prefixed variables) '''

        parser = self.parser
        args = self.args

        # check which command has been used
        if cmd is None:

            # no command specified through program name -> get it from argparser
            cmd = args.command
            
        if cmd in COMMANDS_ALL:
            hashes, package_names, tags = CLIUtil.get_filter_options_from_cli(args)
            yes = args.yes

            if cmd == COMMAND_QUERY:
                self.action_query(hashes, package_names, tags, yes)

            # dblyze -> do the analysis results evaluation            
            elif cmd == COMMAND_EVAL:
                dblyze_scripts = ScriptUtil.import_scripts(args.scripts, clazz_name = "Eval")
                for dblyze_script in dblyze_scripts:
                    dblyze_script().evaluate(self.storage)
                
            # sync from result db to file sys
            elif cmd == COMMAND_SYNC:
                total_entries = androlyze.action_sync_fs(self.storage, lambda _ : False)

                CLIUtil.cli_check_n_exec(androlyze.action_sync_fs,
                                         prompt_prefix = "Will download %d entries from result database!" % total_entries,
                                         circumvent_check = args.yes,
                                         args = (self.storage, lambda _ : True)
                                         )
            else:
                # print welcome message
                clilog.info("Welcome to %s!\n" % PROJECT_NAME)

                # import command
                if cmd == COMMAND_IMPORT:
                    apks_or_paths, _ = self.get_apks_or_paths_from_cli()
                    tag = args.tag
                    copy2disk, copy2db, update, concurrency = args.copy_disk, args.copy_db, args.update, args.concurrency
                    if not update:
                        log.warn('''--update not supplied.
No update of already present apks in database will be done!''')
                    androlyze.action_import_apks(self.storage, apks_or_paths, copy2disk, copy2db, update, tag, concurrency = concurrency)
                # analyze command
                elif cmd == COMMAND_ANALYZE:
                    # androguard path has to be set before
                    from androlyze import action_analyze

                    # sort apks ?
                    get_apks_kwargs = {}
                    no_sort_by_code_size = args.no_sort_code_size
                    if not no_sort_by_code_size:
                        # sort apks by app code size for better scheduling
                        get_apks_kwargs = dict(order_by = TABLE_APK_IMPORT_KEY_SIZE_APP_CODE, ascending = False)
                    apks_or_paths, _ = self.get_apks_or_paths_from_cli(**get_apks_kwargs)

                    # debug infos
                    if not no_sort_by_code_size and not args.apks:
                        apks_or_paths, _it = itertools.tee(apks_or_paths)
                        clilog.info('Using Code Size Scheduling for faster analysis!')
                        log.debug('\n'.join(('%s: %s' % (x.package_name, x.size_app_code) for x in _it)))

                    scripts = args.scripts
                    parallel_mode, concurrency, send_id = self.__load_parallel_settings()

                    # get analysis mode
                    analyze_mode = None
                    if parallel_mode == PARALLELIZATION_MODE_DISTRIBUTED:
                        analyze_mode = ANALYZE_MODE_DISTRIBUTED
                    elif parallel_mode == PARALLELIZATION_MODE_NON_PARALLEL:
                        analyze_mode = ANALYZE_MODE_NON_PARALLEL
                    else:
                        analyze_mode = ANALYZE_MODE_PARALLEL
                    action_analyze(self.storage, scripts, apks_or_paths,
                                   mode = analyze_mode, concurrency = concurrency,
                                   serialize_apks = not send_id)
                # delete command
                elif cmd == COMMAND_DELETE:
                    self.action_delete(parser, hashes, package_names, tags, yes)

                clilog.info("done")