def after_task_publish_action(self, exchange=None, body=None, routing_key = None, signal = None, sender = None,
                                  # take unknown keywords for newer APIs
                                   **kwargs):
        '''
        Inform user about published tasks.

        Function will be executed on the task sender after the task has been published.

        Parameters
        ----------
        exchange : str
        body : dict, optional (default is None)
            The task message body, see Task Messages for a reference of possible fields that can be defined.
        routing_key : str
        signal : signal.Signal
        sender : str

        See Also
        --------
        http://celery.readthedocs.org/en/latest/userguide/signals.html#after-task-publish
        '''
        self.task_collection.inc_send_tasks()

        task_id = body["id"]
        Util.print_dyn_progress("Send tasks: %d, current task id: %s, queue: %s" % (self.task_collection.send_tasks.value, task_id, routing_key))
Esempio n. 2
0
    def after_task_publish_action(
            self,
            exchange=None,
            body=None,
            routing_key=None,
            signal=None,
            sender=None,
            # take unknown keywords for newer APIs
            **kwargs):
        '''
        Inform user about published tasks.

        Function will be executed on the task sender after the task has been published.

        Parameters
        ----------
        exchange : str
        body : dict, optional (default is None)
            The task message body, see Task Messages for a reference of possible fields that can be defined.
        routing_key : str
        signal : signal.Signal
        sender : str

        See Also
        --------
        http://celery.readthedocs.org/en/latest/userguide/signals.html#after-task-publish
        '''
        self.task_collection.inc_send_tasks()

        task_id = body["id"]
        Util.print_dyn_progress(
            "Send tasks: %d, current task id: %s, queue: %s" %
            (self.task_collection.send_tasks.value, task_id, routing_key))
Esempio n. 3
0
    def __init__(self, config_filename, import_db = None):
        '''
        Parameters
        ----------
        config_filename : str, optional (default is `settings.CONFIG_PATH`)
            The path to the config to load.
        import_db : str, optional (default is read from config file)
            Path to the import db.
        '''
        # type: Settings
        if config_filename is None:
            config_filename = settings.CONFIG_PATH

        # create settings variable
        self.__settings = Settings(config_filename, default_path = settings.DEFAULTS_PATH)

        log.debug("config file settings: %s\n\tCLI options may overwrite them!", self.__settings)

        # load and set androguard path from configs
        Util.set_androguard_path(self.settings)

        # type: str
        import_db = self._get_import_db(import_db = import_db)
        #self.args.import_database
        log.info("Using import database: %s", import_db)

        # load a few other settings
        self.__storage = self._create_storage(import_db)
 def print_progess(self):
     ''' Show the progress on run '''
     progress_str = Util.format_progress(self.cnt_done.value , self.cnt_complete)
     time_elapsed = timedelta(seconds=round(time() - self.start_time))
     progress_str = '=> [%s | %s | %s]' % (progress_str, time_elapsed, self.get_latest_analyzed_apk_name())
     sys.stdout.write("\r" + " " * len(self.last_printed_str))
     Util.print_dyn_progress(progress_str)
     self.last_printed_str = progress_str
Esempio n. 5
0
 def print_progess(self):
     ''' Show the progress on run '''
     progress_str = Util.format_progress(self.cnt_done.value,
                                         self.cnt_complete)
     time_elapsed = timedelta(seconds=round(time() - self.start_time))
     progress_str = '=> [%s | %s | %s]' % (
         progress_str, time_elapsed, self.get_latest_analyzed_apk_name())
     sys.stdout.write("\r" + " " * len(self.last_printed_str))
     Util.print_dyn_progress(progress_str)
     self.last_printed_str = progress_str
Esempio n. 6
0
 def print_progess(self):
     ''' Show the progress on run '''
     progress_str = Util.format_progress(
         self.get_chunked_cnt(self.get_total_run_tasks()),
         self.cnt_total_task)
     time_elapsed = timedelta(seconds=round(time() - self.start_time))
     progress_str = 'Successful: %d, Failed: %d, Total: %s -- Time elapsed: %s' % (
         self.successful_tasks, self.failed_tasks, progress_str,
         time_elapsed)
     Util.print_dyn_progress(progress_str)
Esempio n. 7
0
    def __init__(self,
                 storage,
                 script_list,
                 script_hashes,
                 min_script_needs,
                 apks_or_paths,
                 cnt_apks=None,
                 storage_results=None,
                 **kwargs):
        '''
        Use the `import_scripts` method to get a list<type<AndroScript>> from a list of absolute paths (to the scripts).

        Parameters
        ----------
        storage: RedundantStorage
            The storage to store the results.
        script_list: list<type<AndroScript>>
            List of `AndroScript`s references (not instantiated class!)
        script_hashes : list<str>, optional (default is None)
            If given, set the hash for the `AndroScript`s
        min_script_needs : tuple<bool>
            See :py:method:`ScriptUtil.get_maximal_script_options`.
        apks_or_paths: iterable<str> or list<Apk>, optional (default is [])
            List of `Apk` or paths to the apks which shall be analyzed with the given scripts
            If you analyze from paths the `import_date` is not set!
        cnt_apks : int, optional
            Total number of apks to analyze.
            If not given, calculate it.
        storage_results : Queue<tuple<str, bool>>, optional (default is Queue)
            Storage results. First component is the id of the entry
            and the second a boolean indication if the result has been stored in gridfs.
            Will be created if not supplied!

        Raises
        ------
        AndroScriptError
            If an error happened while initializing some `AndroScript`.
        '''
        super(BaseAnalyzer, self).__init__()
        if apks_or_paths is None:
            apks_or_paths = []

        self.__storage = storage
        self.__script_list = script_list
        self.__script_hashes = script_hashes
        self.__min_script_needs = min_script_needs

        if cnt_apks is None:
            # calculate cnt apks if not given
            apks_or_paths, cnt_apks = Util.count_iterable_n_clone(
                apks_or_paths)

        self.__apks_or_paths = apks_or_paths
        self._cnt_apks = cnt_apks

        # shared memory
        self._cnt_analyzed_apks = Value('i', 0, lock=RLock())
        if storage_results is None:
            storage_results = Queue()
        self._storage_results = storage_results
Esempio n. 8
0
    def __init__(self,
                 storage, script_list, script_hashes, min_script_needs, apks_or_paths, cnt_apks = None, storage_results = None, **kwargs):
        '''
        Use the `import_scripts` method to get a list<type<AndroScript>> from a list of absolute paths (to the scripts).

        Parameters
        ----------
        storage: RedundantStorage
            The storage to store the results.
        script_list: list<type<AndroScript>>
            List of `AndroScript`s references (not instantiated class!)
        script_hashes : list<str>, optional (default is None)
            If given, set the hash for the `AndroScript`s
        min_script_needs : tuple<bool>
            See :py:method:`ScriptUtil.get_maximal_script_options`.
        apks_or_paths: iterable<str> or list<Apk>, optional (default is [])
            List of `Apk` or paths to the apks which shall be analyzed with the given scripts
            If you analyze from paths the `import_date` is not set!
        cnt_apks : int, optional
            Total number of apks to analyze.
            If not given, calculate it.
        storage_results : Queue<tuple<str, bool>>, optional (default is Queue)
            Storage results. First component is the id of the entry
            and the second a boolean indication if the result has been stored in gridfs.
            Will be created if not supplied!

        Raises
        ------
        AndroScriptError
            If an error happened while initializing some `AndroScript`.
        '''
        super(BaseAnalyzer, self).__init__()
        if apks_or_paths is None:
            apks_or_paths = []

        self.__storage = storage
        self.__script_list = script_list
        self.__script_hashes = script_hashes
        self.__min_script_needs = min_script_needs

        if cnt_apks is None:
            # calculate cnt apks if not given
            apks_or_paths, cnt_apks = Util.count_iterable_n_clone(apks_or_paths)

        self.__apks_or_paths = apks_or_paths
        self._cnt_apks = cnt_apks

        # shared memory
        self._cnt_analyzed_apks = Value('i', 0, lock = RLock())
        if storage_results is None:
            storage_results = Queue()
        self._storage_results = storage_results
Esempio n. 9
0
        def wrapper(*args, **kwargs):
            try:
                return func(*args, **kwargs)
            except self.exception_tuple as e:
                # match on `caused_by`
                if self.caused_by_tuple is None or isinstance(e, WrapperException) and isinstance(e.caused_by, self.caused_by_tuple):

                    # get self reference -> get subclass of RetryableTask
                    cur_task = args[0]

                    # use exponential backoff for retrying
                    retry_time = CeleryUtil.exp_backoff(cur_task, self.max_retry_time)
                    # log error
                    Util.log_will_retry(retry_time, exc = e, what = func.__name__)
                    # retry
                    raise cur_task.retry(args = cur_task.get_retry_arguments(),
                               exc = e,
                               max_retries = self.max_retries,
                               countdown = retry_time
                               )
                # no match on exceptions -> no retry! -> propagate exception
                raise e, None, sys.exc_info()[2]
Esempio n. 10
0
 def check_n_add(res):
     ''' Check if `res` is ready and has not been revoked etc. '''
     try:
         if res is not None:
                 result = res.get(propagate = False)
                 # if chunked, result is list of multiple tasks -> unpack results
                 if chunked:
                     result = Util.flatten(result)
                 # no result available if e.g. exception raised
                 if result is not None:
                     results.append(result)
     # TaskRevokedError
     except Exception:
         pass
    def send_apk_args_generator(self, apk_gen):
        ''' Generator over arguments for sending APKs.

        Parameters
        ----------
        generator<tuple<object, bool>>
            Generator over zip files or ids.
            Second component of tuples indicates that the generator is other the id's
            rather than over the zip files.
            See :py:method:`.AnalyzeUtil.apk_id_or_raw_data_gen` to get such a generator.
        '''
        # get package names from initialized scripts
        script_packages = Util.module_names_from_class(self.script_list)

        for apk_zipfile_or_hash, is_id, fast_apk in apk_gen:
            yield script_packages, self.min_script_needs, self.script_hashes, apk_zipfile_or_hash, is_id, fast_apk
Esempio n. 12
0
    def send_apk_args_generator(self, apk_gen):
        ''' Generator over arguments for sending APKs.

        Parameters
        ----------
        generator<tuple<object, bool>>
            Generator over zip files or ids.
            Second component of tuples indicates that the generator is other the id's
            rather than over the zip files.
            See :py:method:`.AnalyzeUtil.apk_id_or_raw_data_gen` to get such a generator.
        '''
        # get package names from initialized scripts
        script_packages = Util.module_names_from_class(self.script_list)

        for apk_zipfile_or_hash, is_id, fast_apk in apk_gen:
            yield script_packages, self.min_script_needs, self.script_hashes, apk_zipfile_or_hash, is_id, fast_apk
Esempio n. 13
0
    def __setup_scripts_hash_validation(self, androscripts, script_hashes):
        '''
        Setup scripts.

        Also validate submitted script hashes if script reload is needed!

        Parameters
        ----------
        androscripts : list<str>
            List of package names.
        script_hashes : list<str>
            If given, set the hash for the `AndroScript`s

        Raises
        ------
        AnalyzeError
            If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised.
        ImportError
        ScriptHashValidationError
            If the validation of script hashes fails after reloading scripts from disk.
        '''
        # need tuple to compare
        script_hashes = tuple(script_hashes)

        # import script modules
        script_types = ScriptUtil.import_scripts(androscripts,
                                                 via_package=True,
                                                 _reload=True)

        # instantiate scripts and get classes
        self.androscripts = ScriptUtil.instantiate_scripts(
            script_types,
            # needed for path calculation
            script_paths=[Util.package_name_2_path(s) for s in androscripts])

        actual_hashes = tuple([s.hash for s in self.androscripts])

        if sorted(actual_hashes) != sorted(script_hashes):
            raise ScriptHashValidationError(script_hashes, actual_hashes)
Esempio n. 14
0
    def __setup_scripts_hash_validation(self, androscripts, script_hashes):
        '''
        Setup scripts.

        Also validate submitted script hashes if script reload is needed!

        Parameters
        ----------
        androscripts : list<str>
            List of package names.
        script_hashes : list<str>
            If given, set the hash for the `AndroScript`s

        Raises
        ------
        AnalyzeError
            If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised.
        ImportError
        ScriptHashValidationError
            If the validation of script hashes fails after reloading scripts from disk.
        '''
        # need tuple to compare
        script_hashes = tuple(script_hashes)

        # import script modules
        script_types = ScriptUtil.import_scripts(androscripts, via_package = True, _reload = True)

        # instantiate scripts and get classes
        self.androscripts = ScriptUtil.instantiate_scripts(script_types,
                                                           # needed for path calculation
                                                           script_paths = [Util.package_name_2_path(s) for s in androscripts])

        actual_hashes = tuple([s.hash for s in self.androscripts])

        if sorted(actual_hashes) != sorted(script_hashes):
            raise ScriptHashValidationError(script_hashes, actual_hashes)
Esempio n. 15
0
    def fast_load_from_io(file_like_object=None,
                          apk_file_path=None,
                          calculate_hash=True):
        ''' Load a FastApk from file-like object or path by unzipping only the manifest file
        and calculating the hash.

        Parameters
        ----------
        file_like_object : file-like-object, optional (default is None)
            A file-like obj that points to the apk.
            If non given, try to open a file_like_object from the given `apk_file_path`.
        apk_file_path: str, optional (default is "not set")
            Path of apk
        calculate_hash : bool
            If true calculate the hash.
            This means the file has be to loaded completely into memory.
            If False, the hash will be calculated the first time it gets retrieved.

        Returns
        -------
        apk: FastApk

        Raises
        ------
        CouldNotOpenApk
            If the apk file could not be opened
        CouldNotReadManifest
            If the manifest file could not be read
        '''
        from androguard.core.bytecodes import apk as androapk
        from androguard.patch import zipfile

        # apk will be loaded from `flo` variable
        flo = file_like_object

        # indicates if file has been opened from path
        file_open_from_path = False
        # no file_like_object given, open file from path
        if file_like_object is None and isinstance(apk_file_path, str):
            try:
                flo = open(apk_file_path, "rb")
                file_open_from_path = True
            except IOError as e:
                flo.close()
                raise CouldNotOpenApk(apk_file_path,
                                      e), None, sys.exc_info()[2]

        # if file path not set, show at least that it's not seen in the exceptions
        if apk_file_path is None:
            apk_file_path = "not set"

        # load apk into memory and calculate hash if option set
        flo.seek(0)
        _hash = None
        if calculate_hash:
            data = flo.read()
            # calculate hash
            _hash = Util.sha256(data)
            flo.seek(0)

        try:
            if zipfile.is_zipfile(flo):
                z = zipfile.ZipFile(flo)
                # only read manifest from zip file
                binary_manifest = z.read(MANIFEST_FILENAME)
                ap = androapk.AXMLPrinter(binary_manifest)
                dom = minidom.parseString(ap.get_buff())
                manifest_tag = dom.getElementsByTagName(MANIFEST_TAG_NAME)
                # check that manifest tag is available
                if len(manifest_tag) > 0:

                    # get size of uncompresses .dex file
                    size_app_code = z.getinfo(COMPILED_APP_CODE).file_size
                    # get build date (last timestamp of classes.dex in zipfile)
                    build_date = datetime(
                        # use tuple from zipfile and pass the unpacked content to the constructor
                        *z.getinfo(COMPILED_APP_CODE).date_time)

                    manifest_items = manifest_tag[0].attributes
                    # use the namespace to ignore wrong prefixes like "ns0"
                    version_name = manifest_items.getNamedItemNS(
                        MANIFEST_NS, MANIFEST_VERSION_NAME).nodeValue
                    package = manifest_items.getNamedItem(
                        MANIFEST_PACKAGE).nodeValue
                    return FastApk(package,
                                   version_name,
                                   path=apk_file_path,
                                   _hash=_hash,
                                   size_app_code=size_app_code,
                                   build_date=build_date)
            raise CouldNotOpenManifest(apk_file_path), None, sys.exc_info()[2]
        except Exception as e:
            raise CouldNotOpenApk(apk_file_path,
                                  caused_by=e), None, sys.exc_info()[2]
        finally:
            # close file if manually opened from path
            if file_open_from_path:
                flo.close()
Esempio n. 16
0
    def androguard_load_from_io(file_like_object=None,
                                apk_file_path=None,
                                calculate_hash=True):
        ''' Load a FastApk from file-like object or path by using `androgaurd`.
        Parameters
        ----------
        file_like_object : file-like-object, optional (default is None)
            A file-like obj that points to the apk.
            If non given, try to open a file_like_object from the given `apk_file_path`.
        apk_file_path: str, optional (default is "not set")
            Path of apk
        calculate_hash : bool
            If true calculate the hash.
            This means the file has be to loaded completely into memory.
            If False, the hash will be calculated the first time it gets retrieved.

        Returns
        -------
        apk: FastApk

        Raises
        ------
        CouldNotOpenApk
            If the apk file could not be opened
        '''
        # prevent circular import
        from androlyze.analyze import AnalyzeUtil

        # apk will be loaded from `flo` variable
        flo = file_like_object

        # indicates if file has been opened from path
        file_open_from_path = False
        # no file_like_object given, open file from path
        if file_like_object is None and isinstance(apk_file_path, str):
            try:
                flo = open(apk_file_path, "rb")
                file_open_from_path = True
            except IOError as e:
                flo.close()
                raise CouldNotOpenApk(apk_file_path,
                                      e), None, sys.exc_info()[2]

        # if file path not set, show at least that it's not seen in the exceptions
        if apk_file_path is None:
            apk_file_path = "not set"

        # load apk into memory and calculate hash if option set
        flo.seek(0)
        _hash = None
        data = flo.read()
        if calculate_hash:
            # calculate hash
            _hash = Util.sha256(data)
            flo.seek(0)

        apk = AnalyzeUtil.open_apk(data, raw=True, path=apk_file_path)

        if file_open_from_path:
            flo.close()

        if apk is not None:
            try:
                return FastApk.load_from_eandroapk(apk)
            except KeyError:
                pass

        # could not open apk -> raise error
        raise CouldNotOpenApk(file_path=apk_file_path)
Esempio n. 17
0
    def fast_load_from_io(file_like_object = None, apk_file_path = None, calculate_hash = True):
        ''' Load a FastApk from file-like object or path by unzipping only the manifest file
        and calculating the hash.

        Parameters
        ----------
        file_like_object : file-like-object, optional (default is None)
            A file-like obj that points to the apk.
            If non given, try to open a file_like_object from the given `apk_file_path`.
        apk_file_path: str, optional (default is "not set")
            Path of apk
        calculate_hash : bool
            If true calculate the hash.
            This means the file has be to loaded completely into memory.
            If False, the hash will be calculated the first time it gets retrieved.

        Returns
        -------
        apk: FastApk

        Raises
        ------
        CouldNotOpenApk
            If the apk file could not be opened
        CouldNotReadManifest
            If the manifest file could not be read
        '''
        from androguard.core.bytecodes import apk as androapk
        from androguard.patch import zipfile

        # apk will be loaded from `flo` variable
        flo = file_like_object

        # indicates if file has been opened from path
        file_open_from_path = False
        # no file_like_object given, open file from path
        if file_like_object is None and isinstance(apk_file_path, str):
            try:
                flo = open(apk_file_path, "rb")
                file_open_from_path = True
            except IOError as e:
                flo.close()
                raise CouldNotOpenApk(apk_file_path, e), None, sys.exc_info()[2]

        # if file path not set, show at least that it's not seen in the exceptions
        if apk_file_path is None:
            apk_file_path = "not set"

        # load apk into memory and calculate hash if option set
        flo.seek(0)
        _hash = None
        if calculate_hash:
            data = flo.read()
            # calculate hash
            _hash = Util.sha256(data)
            flo.seek(0)

        try:
            if zipfile.is_zipfile(flo):
                z = zipfile.ZipFile(flo)
                # only read manifest from zip file
                binary_manifest = z.read(MANIFEST_FILENAME)
                ap = androapk.AXMLPrinter(binary_manifest)
                dom = minidom.parseString(ap.get_buff())
                manifest_tag = dom.getElementsByTagName(MANIFEST_TAG_NAME)
                # check that manifest tag is available
                if len(manifest_tag) > 0:
                    
                    # get size of uncompresses .dex file
                    size_app_code = z.getinfo(COMPILED_APP_CODE).file_size
                    # get build date (last timestamp of classes.dex in zipfile)
                    build_date = datetime(
                                          # use tuple from zipfile and pass the unpacked content to the constructor
                                          *z.getinfo(COMPILED_APP_CODE).date_time
                                          )
                    
                    
                    manifest_items = manifest_tag[0].attributes
                    # use the namespace to ignore wrong prefixes like "ns0"
                    version_name = manifest_items.getNamedItemNS(MANIFEST_NS, MANIFEST_VERSION_NAME).nodeValue
                    package = manifest_items.getNamedItem(MANIFEST_PACKAGE).nodeValue
                    return FastApk(package, version_name, path = apk_file_path, _hash = _hash, size_app_code = size_app_code, build_date = build_date)
            raise CouldNotOpenManifest(apk_file_path), None, sys.exc_info()[2]
        except Exception as e:
            raise CouldNotOpenApk(apk_file_path, caused_by = e), None, sys.exc_info()[2]
        finally:
            # close file if manually opened from path
            if file_open_from_path:
                flo.close()
Esempio n. 18
0
    def androguard_load_from_io(file_like_object = None, apk_file_path = None, calculate_hash = True):
        ''' Load a FastApk from file-like object or path by using `androgaurd`.
        Parameters
        ----------
        file_like_object : file-like-object, optional (default is None)
            A file-like obj that points to the apk.
            If non given, try to open a file_like_object from the given `apk_file_path`.
        apk_file_path: str, optional (default is "not set")
            Path of apk
        calculate_hash : bool
            If true calculate the hash.
            This means the file has be to loaded completely into memory.
            If False, the hash will be calculated the first time it gets retrieved.

        Returns
        -------
        apk: FastApk

        Raises
        ------
        CouldNotOpenApk
            If the apk file could not be opened
        '''
        # prevent circular import
        from androlyze.analyze import AnalyzeUtil

        # apk will be loaded from `flo` variable
        flo = file_like_object

        # indicates if file has been opened from path
        file_open_from_path = False
        # no file_like_object given, open file from path
        if file_like_object is None and isinstance(apk_file_path, str):
            try:
                flo = open(apk_file_path, "rb")
                file_open_from_path = True
            except IOError as e:
                flo.close()
                raise CouldNotOpenApk(apk_file_path, e), None, sys.exc_info()[2]

        # if file path not set, show at least that it's not seen in the exceptions
        if apk_file_path is None:
            apk_file_path = "not set"

        # load apk into memory and calculate hash if option set
        flo.seek(0)
        _hash = None
        data = flo.read()
        if calculate_hash:
            # calculate hash
            _hash = Util.sha256(data)
            flo.seek(0)

        apk = AnalyzeUtil.open_apk(data, raw = True, path = apk_file_path)

        if file_open_from_path:
            flo.close()

        if apk is not None:
            try:
                return FastApk.load_from_eandroapk(apk)
            except KeyError:
                pass

        # could not open apk -> raise error
        raise CouldNotOpenApk(file_path = apk_file_path)
Esempio n. 19
0
 def print_progess(self):
     ''' Show the progress on run '''
     progress_str = Util.format_progress(self.get_chunked_cnt(self.get_total_run_tasks()) , self.cnt_total_task)
     time_elapsed = timedelta(seconds=round(time() - self.start_time))
     progress_str = 'Successful: %d, Failed: %d, Total: %s -- Time elapsed: %s' % (self.successful_tasks, self.failed_tasks, progress_str, time_elapsed)
     Util.print_dyn_progress(progress_str)
Esempio n. 20
0
 def print_progess(cnt_analyzed):
     progress_str = Util.format_progress(cnt_analyzed * tasks_per_chunk, total_cnt)
     time_elapsed = timedelta(seconds=round(time() - start))
     progress_str = '%s, Time elapsed: %s' % (progress_str, time_elapsed)
     Util.print_dyn_progress(progress_str)
Esempio n. 21
0
def analyze_apk(eandro_apk,
                scripts,
                min_script_needs,
                propagate_error=False,
                reset_scripts=True):
    ''' Analyze the `eandro_apk` with the given `scripts` assuming each `AndroScript`
    neads at least `min_script_needs`.

    Be sure that you reseted the `scripts`!

    Parameters
    ----------
    eandro_apk : EAndroApk
        The apk.
    scripts : iterable<AndroScript>
        The scripts to use for the analysis.
    min_script_needs : tuple<bool>
        See :py:meth:ScriptUtil.get_maximal_script_options`
    propagate_error : bool, optional (default is False)
        If true propagate errors.
    reset_scripts : bool, optional (default is True)
        If given, reset the `AndroScript` before analyzing.

    Returns
    -------
    list<FastApk, list<AndroScript>>
        Uses `FastApk` to only store the meta information, not the apk data!
    None
        If error happened.
    '''
    from androlyze.analyze.exception import AndroScriptError

    try:
        # reset scripts
        if reset_scripts:
            for s in scripts:
                s.reset()

        if eandro_apk is not None:
            fastapk = None
            # analyze classes.dex with script requirements and get time
            args = [eandro_apk.get_dex()] + list(min_script_needs)

            time_s, analysis_objs = Util.timeit(analyze_dex, *args, raw=True)

            script_results = []
            for s in scripts:
                try:
                    result_obj = s.analyze(eandro_apk, *analysis_objs)

                    # we only need the meta infos of the apk
                    if eandro_apk is not None:
                        fastapk = FastApk.load_from_eandroapk(eandro_apk)

                    # set androguard analysis time if script wants stats
                    s.add_apk_androguard_analyze_time(time_s)

                    # link to apk
                    if isinstance(result_obj, ResultObject):
                        result_obj.set_apk(fastapk)

                    script_results.append(s)
                except Exception as e:
                    if propagate_error:
                        raise
                    else:
                        log.exception(AndroScriptError(s, e))

            if fastapk is not None:
                # use fastapk to only store the meta information, not the apk data!
                return [fastapk, script_results]

    # interrupt analysis if analysis objects could not be created!
    except DexError as e:
        log.exception(e)
Esempio n. 22
0
 def converter(obj):
     if isinstance(obj, datetime):
         return Util.datetime_to_iso8601(obj)
     elif isinstance(obj, ObjectId):
         return str(obj)
Esempio n. 23
0
    def _analyze(self):
        ''' See doc of :py:method:BaseAnalyzer.analyze`. '''
        try:
            work_queue = self.work_queue

            # create worker pool
            log.debug("starting %s workers ...", self.concurrency)
            for _ in range(self.concurrency):
                p = Worker(self.script_list, self.script_hashes,
                           self.min_script_needs, work_queue, self.storage,
                           self.cnt_analyzed_apks, self.analyzed_apks,
                           self.storage_results)
                self.workers.append(p)
                p.daemon = True

            # start workers
            for p in self.workers:
                p.start()

            # queue has size limit -> start workers first then enqueue items
            log.info("Loading apk paths into work queue ...")
            for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths):
                # task is apk with all scripts
                work_queue.put(apk_stuff)

            for _ in range(self.concurrency):
                # signal end-of-work
                work_queue.put(STOP_SENTINEL)

            # progress view for cli
            av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks,
                                   self.analyzed_apks)
            av.daemon = True
            av.start()

            # block until workers finished
            work_queue.join()
            av.terminate()
            log.debug("joined on work queue ...")

            return self.cnt_analyzed_apks.value

        # try hot shutdown first
        except KeyboardInterrupt:
            log.warn("Hot shutdown ... ")
            try:
                log.warn("clearing work queue ... ")
                Util.clear_queue(work_queue)
                log.warn("cleared work queue ... ")

                for _ in range(self.concurrency):
                    # signal end-of-work
                    work_queue.put(STOP_SENTINEL)

                for worker in self.workers:
                    worker.join()
                log.warn("waited for all workers ... ")

                return self.cnt_analyzed_apks.value

            # if user really wants make a cold shutdown -> kill processes
            except KeyboardInterrupt:
                log.warn("Cold shutdown ... ")
                log.warn("Hard shutdown wanted! Killing all workers!")

                # kill processes via SIGINT -> send CTRL-C
                for w in self.workers:
                    try:
                        os.kill(w.pid, signal.SIGINT)
                    except:
                        pass

                return self.cnt_analyzed_apks.value
Esempio n. 24
0
__author__ = "Nils Tobias Schmidt"
__email__ = "schmidt89 at informatik.uni-marburg.de"

import pickle

from celery import Celery
from kombu.serialization import register

from androlyze.Constants import PROJECT_NAME
from androlyze.celery.celerysettings import settings
from androlyze.settings import *
from androlyze.util import Util
from androlyze.analyze.distributed.tasks.AnalyzeTask import AnalyzeTask
from celery.registry import tasks

# worker has to import androguard too
Util.set_androguard_path(settings)

# set pickle to specific protocol
register('pickle', lambda s: pickle.dumps(s, 2), lambda s: pickle.loads(s),
        content_type='application/x-python-serialize',
        content_encoding='binary')

app = Celery(PROJECT_NAME)

# load config
app.config_from_object(CELERY_CONF)

if __name__ == '__main__':
    app.start()
Esempio n. 25
0
    def _analyze(self):
        ''' See doc of :py:method:BaseAnalyzer.analyze`. '''
        try:
            work_queue = self.work_queue

            # create worker pool
            log.debug("starting %s workers ...", self.concurrency)
            for _ in range(self.concurrency):
                p = Worker(self.script_list, self.script_hashes, self.min_script_needs,
                                                 work_queue, self.storage,
                                                 self.cnt_analyzed_apks, self.analyzed_apks, self.storage_results)
                self.workers.append(p)
                p.daemon = True

            # start workers
            for p in self.workers:
                p.start()

            # queue has size limit -> start workers first then enqueue items
            log.info("Loading apk paths into work queue ...")
            for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths):
                # task is apk with all scripts
                work_queue.put(apk_stuff)

            for _ in range(self.concurrency):
                # signal end-of-work
                work_queue.put(STOP_SENTINEL)

            # progress view for cli
            av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks, self.analyzed_apks)
            av.daemon = True
            av.start()
            
            # block until workers finished
            work_queue.join()
            av.terminate()
            log.debug("joined on work queue ...")

            return self.cnt_analyzed_apks.value

        # try hot shutdown first
        except KeyboardInterrupt:
            log.warn("Hot shutdown ... ")
            try:
                log.warn("clearing work queue ... ")
                Util.clear_queue(work_queue)
                log.warn("cleared work queue ... ")
                
                for _ in range(self.concurrency):
                    # signal end-of-work
                    work_queue.put(STOP_SENTINEL)
                    
                for worker in self.workers:
                    worker.join()
                log.warn("waited for all workers ... ")

                return self.cnt_analyzed_apks.value

            # if user really wants make a cold shutdown -> kill processes
            except KeyboardInterrupt:
                log.warn("Cold shutdown ... ")
                log.warn("Hard shutdown wanted! Killing all workers!")

                # kill processes via SIGINT -> send CTRL-C
                for w in self.workers:
                    try:
                        os.kill(w.pid, signal.SIGINT)
                    except:
                        pass

                return self.cnt_analyzed_apks.value
Esempio n. 26
0
        disable_std_loggers()
    else:
        log_set_level(LOG_LEVEL)
        clilog_set_level(logging.INFO)

    # write to file with specified log level
    redirect_to_file_handler(logger_filename, LOG_LEVEL)


if __name__ == "__main__":
    if PROFILE:
        import cProfile
        import pstats
        profile_filename = 'androlyze.Main_profile.txt'
        cProfile.run('main()', profile_filename)
        statsfile = open("profile_stats.txt", "wb")
        p = pstats.Stats(profile_filename, stream=statsfile)
        stats = p.strip_dirs().sort_stats('cumulative')
        stats.print_stats()
        statsfile.close()
        sys.exit(0)
    time, ret_code = Util.timeit(main)
    log.warn('Took %s (h/m/s)\n' % datetime.timedelta(seconds=round(time)))

    if DEBUG:
        with open("time.txt", "a") as f:
            f.write('%s : %s\n' % (datetime.datetime.now(),
                                   datetime.timedelta(seconds=round(time))))

    sys.exit()
Esempio n. 27
0
    def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args,
                 **kwargs):
        ''' Analyze by running all `AndroScript`s '''

        # log script meta ?
        log_script_meta = kwargs.get("log_script_meta", True)
        # may be disabled! check!
        if not self.log_chained_script_meta_infos():
            log_script_meta = False

        # don't log script meta infos in chained scripts inside this `ChainedScript`
        kwargs["log_script_meta"] = False

        if log_script_meta:
            # log meta infos
            self._log_chained_script_meta()

        # collect results from scripts
        collected_results = self.res

        # run over scripts
        for ascript in self.chain_scripts():
            script_result = None
            chained_script_name = self.try_get_chained_script_name(ascript)
            try:
                # analyze with script
                script_result = ascript.analyze(apk, dalvik_vm_format,
                                                vm_analysis, gvm_analysis,
                                                *args, **kwargs)

                # store results under given categories
                categories = self.root_categories()
                if len(categories) > 0:
                    # run over dict and log items
                    for key, val in script_result.results.items():
                        collected_results.register_keys([key], *categories)
                        collected_results.log(key, val, *categories)

                else:
                    # simply update dict
                    collected_results.results.update(script_result.results)

                if log_script_meta:
                    # log successful run
                    collected_results.log_append_to_enum(
                        CAT_SUCCESSFUL, chained_script_name, CAT_ROOT)

            except Exception as e:
                if log_script_meta:
                    # the value that will be logged for the script failure
                    failure_log_val = chained_script_name

                    # if exception shall be logged, create dict with name as key and exception as value
                    if self.log_script_failure_exception():
                        # exception message
                        exc_msg = Util.format_exception(sys.exc_info(),
                                                        as_string=False)
                        failure_log_val = {failure_log_val: exc_msg}

                    # log that script encountered an error
                    collected_results.log_append_to_enum(
                        CAT_FAILURES, failure_log_val, CAT_ROOT)

                if not self.continue_on_script_failure():
                    # reraise exception if the analysis shall be stopped
                    # after a script encountered an error
                    raise
                else:
                    log.warn(
                        '''%s: The script "%s" on apk: %s caused an error! But the other scripts will still run! Have a look at the options of `ChainedScript` for exception traceback writing!
\tError: %s''' % (self.__class__.__name__, ascript, apk.short_description(),
                    e))
Esempio n. 28
0
def import_scripts(script_list, via_package = False, _reload = False, clazz_name = None):
    '''
    Import the scripts (via file path or package name - configurable via `via_pacakge`).

    Parameters
    ----------
    script_list: list<str>
        list of script names (absolute path) or package names.
    via_package : bool, optional (default is False)
        If true, assume package names are given instead of file paths.
    _reload : bool, optional (default is False)
        Reload scripts and delete them from internal cache.
        Only possible if `via_package`.
    clazz_name : optional (default is None)
        The name of the class to import. If none, use the name of the module. 

    Returns
    -------
    list<type<AndroScript>>
        list of uninstantiated AndroScript classes

    Raises
    ------
    AnalyzeError
        If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised.
    ImportError
    '''
    # late import -> pervent recursive import
    from androlyze.model.script.AndroScript import AndroScript
    from androlyze.analyze.exception import AnalyzeError
    androscripts = []

    # reload scripts if wanted
    if via_package and _reload:
        for script_package in script_list:
            log.debug("deleting %s from system modules", script_package)
            try:
                del sys.modules[script_package]
                log.debug("deleted")
            except KeyError:
                pass

    for script in script_list:
        class_name = clazz_name
        
        if not class_name:
            if via_package:
                class_name = script.split(".")[-1]
            else:
                class_name = basename(script.split(".py")[0])

        # class name must be equivalent to the module name!
        try:
            module_package = script
            # get package name from path and cut off file extension
            if not via_package:
                module_package = Util.path_2_package_name(script)
            module = importlib.import_module(module_package)
            clazz = getattr(module, class_name)
            # check if class is derived from AndroScript
            if isinstance(clazz, AndroScript.__class__):
                androscripts.append(clazz)
            else:
                raise NoAndroScriptSubclass(clazz), None, sys.exc_info()[2]
        except AttributeError as e:
            raise ModuleNotSameClassNameException(script, class_name), None, sys.exc_info()[2]
        except IOError as e:
            e.filename = script
            raise
        except (NoAndroScriptSubclass, ModuleNotSameClassNameException, IOError) as e:
            raise AnalyzeError(e), None, sys.exc_info()[2]

    return androscripts
Esempio n. 29
0
    def fetch_results_from_mongodb(self, rds, results, wait_for_db = True,
                                   # progress
                                   nice_progess = False, synced_entries = None, total_sync_entries = None):
        '''
        Fetch some results from the result database and write them to disk.

        If data cannot be loaded from db, try until it can be.

        Parameters
        ----------
        rds : ResultDatabaseStorage
            The database to query for the results.
        results : list< tuple<id, gridfs (bool)> >
            Define which results shall be fetched.
        wait_for_db : bool, optional (default is True)
            Wait until data could be fetched from db.
        nice_progess : bool, optional (default is False)
            If enabled update show some nice progress bar on the cli.
        synced_entries : multiprocessing.Value<int>, optional (default is None)
            If supplied store number of already synces entries.
        total_sync_entries : multiprocessing.Value<int>, optional (default is None)
            If supplied store number of total entries to sync.

        Raises
        ------
        DatabaseLoadException
            If `wait_for_db` is False and an error occurred.
        '''
        # retry in ... seconds
        DATABASE_RETRY_TIME = 5

        # if true assume both counts are shared memory (Value)
        use_shared_memory = synced_entries is not None and total_sync_entries is not None

        if results is not None:
            results_stored = False
            while not results_stored:
                try:
                    # get ids
                    non_gridfs_ids, gridfs_ids = MongoUtil.split_result_ids(results)

                    # counts
                    cnt_non_gridfs_ids = len(non_gridfs_ids)
                    cnt_gridfs_ids = len(gridfs_ids)

                    if use_shared_memory:
                        total_sync_entries.value = cnt_gridfs_ids + cnt_non_gridfs_ids

                    # gridfs raw data as well as metadata
                    gridfs_entries_raw = []
                    if gridfs_ids:
                        gridfs_entries_raw = rds.get_results_for_ids(gridfs_ids, non_document = True, non_document_raw = True)

                    # regular documents (non gridfs)
                    non_gridfs_entries = []
                    if non_gridfs_ids:
                        non_gridfs_entries = rds.get_results_for_ids(non_gridfs_ids, non_document = False, non_document_raw = True)

                    if not nice_progess:
                        log.debug("fetching %d non-documents (gridfs) ... ", cnt_gridfs_ids)

                    for i, gridfs_entry_raw in enumerate(gridfs_entries_raw, 1):

                        # get our stored metadata (for script and apk)
                        gridfs_entry_meta = gridfs_entry_raw.metadata

                        if not nice_progess:
                            log.debug("getting results for %s", gridfs_entry_meta[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME])
                        else:
                            Util.print_dyn_progress(Util.format_progress(i, cnt_gridfs_ids))

                        # use apk to extract data from dict
                        fastapk = FastApk.load_from_result_dict(gridfs_entry_meta)
                        # get filename
                        file_name = gridfs_entry_raw.filename

                        # write results to disk
                        try:
                            self.store_custom_data(fastapk.package_name, fastapk.version_name, fastapk.hash, file_name, gridfs_entry_raw.read())
                        except FileSysStoreException as e:
                            log.exception(e)

                        # update shared memory progress indicitor
                        if use_shared_memory:
                            with synced_entries.get_lock():
                                synced_entries.value += 1

                    if not nice_progess:
                        log.debug("fetching %d documents (non-gridfs) ... ", cnt_non_gridfs_ids)

                    for i, non_gridfs_entry in enumerate(non_gridfs_entries, 1):
                        if not nice_progess:
                            clilog.debug("getting results for %s" % non_gridfs_entry[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME])
                        else:
                            Util.print_dyn_progress(Util.format_progress(i, cnt_non_gridfs_ids))

                        # write results to disk
                        self.store_result_dict(non_gridfs_entry)

                        # update shared memory progress indicitor
                        if use_shared_memory:
                            with synced_entries.get_lock():
                                synced_entries.value += 1

                    # if not wait for db wanted stop here
                    results_stored = True or not wait_for_db

                except (DatabaseLoadException, PyMongoError) as e:
                    if not wait_for_db:
                        raise
                    log.warn(e)
                    Util.log_will_retry(DATABASE_RETRY_TIME, exc = e)
                    sleep(DATABASE_RETRY_TIME)
Esempio n. 30
0
    def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs):
        ''' Analyze by running all `AndroScript`s '''

        # log script meta ?
        log_script_meta = kwargs.get("log_script_meta", True)
        # may be disabled! check!
        if not self.log_chained_script_meta_infos():
            log_script_meta = False

        # don't log script meta infos in chained scripts inside this `ChainedScript`
        kwargs["log_script_meta"] = False

        if log_script_meta:
            # log meta infos
            self._log_chained_script_meta()

        # collect results from scripts
        collected_results = self.res

        # run over scripts
        for ascript in self.chain_scripts():
            script_result = None
            chained_script_name = self.try_get_chained_script_name(ascript)
            try:
                # analyze with script
                script_result = ascript.analyze(apk, dalvik_vm_format, vm_analysis, gvm_analysis,
                                                *args, **kwargs)

                # store results under given categories
                categories = self.root_categories()
                if len(categories) > 0:
                    # run over dict and log items
                    for key, val in script_result.results.items():
                        collected_results.register_keys([key], *categories)
                        collected_results.log(key, val, *categories)

                else:
                    # simply update dict
                    collected_results.results.update(script_result.results)

                if log_script_meta:
                    # log successful run
                    collected_results.log_append_to_enum(CAT_SUCCESSFUL, chained_script_name, CAT_ROOT)

            except Exception as e:
                if log_script_meta:
                    # the value that will be logged for the script failure
                    failure_log_val = chained_script_name

                    # if exception shall be logged, create dict with name as key and exception as value
                    if self.log_script_failure_exception():
                        # exception message
                        exc_msg = Util.format_exception(sys.exc_info(), as_string = False)
                        failure_log_val = {failure_log_val : exc_msg}

                    # log that script encountered an error
                    collected_results.log_append_to_enum(CAT_FAILURES, failure_log_val, CAT_ROOT)

                if not self.continue_on_script_failure():
                    # reraise exception if the analysis shall be stopped
                    # after a script encountered an error
                    raise
                else:
                    log.warn('''%s: The script "%s" on apk: %s caused an error! But the other scripts will still run! Have a look at the options of `ChainedScript` for exception traceback writing!
\tError: %s''' % (self.__class__.__name__, ascript, apk.short_description(), e))
Esempio n. 31
0
def analyze_apk(eandro_apk, scripts, min_script_needs, propagate_error = False, reset_scripts = True):
    ''' Analyze the `eandro_apk` with the given `scripts` assuming each `AndroScript`
    neads at least `min_script_needs`.

    Be sure that you reseted the `scripts`!

    Parameters
    ----------
    eandro_apk : EAndroApk
        The apk.
    scripts : iterable<AndroScript>
        The scripts to use for the analysis.
    min_script_needs : tuple<bool>
        See :py:meth:ScriptUtil.get_maximal_script_options`
    propagate_error : bool, optional (default is False)
        If true propagate errors.
    reset_scripts : bool, optional (default is True)
        If given, reset the `AndroScript` before analyzing.

    Returns
    -------
    list<FastApk, list<AndroScript>>
        Uses `FastApk` to only store the meta information, not the apk data!
    None
        If error happened.
    '''
    from androlyze.analyze.exception import AndroScriptError

    try:
        # reset scripts
        if reset_scripts:
            for s in scripts:
                s.reset()

        if eandro_apk is not None:
            fastapk = None
            # analyze classes.dex with script requirements and get time
            args = [eandro_apk.get_dex()] + list(min_script_needs)

            time_s, analysis_objs = Util.timeit(analyze_dex, *args, raw = True)

            script_results = []
            for s in scripts:
                try:
                    result_obj = s.analyze(eandro_apk, *analysis_objs)

                    # we only need the meta infos of the apk
                    if eandro_apk is not None:
                        fastapk = FastApk.load_from_eandroapk(eandro_apk)

                    # set androguard analysis time if script wants stats
                    s.add_apk_androguard_analyze_time(time_s)

                    # link to apk
                    if isinstance(result_obj, ResultObject):
                        result_obj.set_apk(fastapk)

                    script_results.append(s)
                except Exception as e:
                    if propagate_error:
                        raise
                    else:
                        log.exception(AndroScriptError(s, e))

            if fastapk is not None:
                # use fastapk to only store the meta information, not the apk data!
                return [fastapk, script_results]

    # interrupt analysis if analysis objects could not be created!
    except DexError as e:
        log.exception(e)
Esempio n. 32
0
 def print_progess(cnt_analyzed):
     progress_str = Util.format_progress(cnt_analyzed * tasks_per_chunk,
                                         total_cnt)
     time_elapsed = timedelta(seconds=round(time() - start))
     progress_str = '%s, Time elapsed: %s' % (progress_str, time_elapsed)
     Util.print_dyn_progress(progress_str)
Esempio n. 33
0
    if quiet:
        disable_std_loggers()
    else:
        log_set_level(LOG_LEVEL)
        clilog_set_level(logging.INFO)

    # write to file with specified log level
    redirect_to_file_handler(logger_filename, LOG_LEVEL)

if __name__ == "__main__":
    if PROFILE:
        import cProfile
        import pstats
        profile_filename = 'androlyze.Main_profile.txt'
        cProfile.run('main()', profile_filename)
        statsfile = open("profile_stats.txt", "wb")
        p = pstats.Stats(profile_filename, stream=statsfile)
        stats = p.strip_dirs().sort_stats('cumulative')
        stats.print_stats()
        statsfile.close()
        sys.exit(0)
    time, ret_code = Util.timeit(main)
    log.warn('Took %s (h/m/s)\n' % datetime.timedelta(seconds=round(time)))

    if DEBUG:
        with open("time.txt", "a") as f:
            f.write('%s : %s\n' % (datetime.datetime.now(), datetime.timedelta(seconds=round(time))))

    sys.exit()