def after_task_publish_action(self, exchange=None, body=None, routing_key = None, signal = None, sender = None, # take unknown keywords for newer APIs **kwargs): ''' Inform user about published tasks. Function will be executed on the task sender after the task has been published. Parameters ---------- exchange : str body : dict, optional (default is None) The task message body, see Task Messages for a reference of possible fields that can be defined. routing_key : str signal : signal.Signal sender : str See Also -------- http://celery.readthedocs.org/en/latest/userguide/signals.html#after-task-publish ''' self.task_collection.inc_send_tasks() task_id = body["id"] Util.print_dyn_progress("Send tasks: %d, current task id: %s, queue: %s" % (self.task_collection.send_tasks.value, task_id, routing_key))
def after_task_publish_action( self, exchange=None, body=None, routing_key=None, signal=None, sender=None, # take unknown keywords for newer APIs **kwargs): ''' Inform user about published tasks. Function will be executed on the task sender after the task has been published. Parameters ---------- exchange : str body : dict, optional (default is None) The task message body, see Task Messages for a reference of possible fields that can be defined. routing_key : str signal : signal.Signal sender : str See Also -------- http://celery.readthedocs.org/en/latest/userguide/signals.html#after-task-publish ''' self.task_collection.inc_send_tasks() task_id = body["id"] Util.print_dyn_progress( "Send tasks: %d, current task id: %s, queue: %s" % (self.task_collection.send_tasks.value, task_id, routing_key))
def __init__(self, config_filename, import_db = None): ''' Parameters ---------- config_filename : str, optional (default is `settings.CONFIG_PATH`) The path to the config to load. import_db : str, optional (default is read from config file) Path to the import db. ''' # type: Settings if config_filename is None: config_filename = settings.CONFIG_PATH # create settings variable self.__settings = Settings(config_filename, default_path = settings.DEFAULTS_PATH) log.debug("config file settings: %s\n\tCLI options may overwrite them!", self.__settings) # load and set androguard path from configs Util.set_androguard_path(self.settings) # type: str import_db = self._get_import_db(import_db = import_db) #self.args.import_database log.info("Using import database: %s", import_db) # load a few other settings self.__storage = self._create_storage(import_db)
def print_progess(self): ''' Show the progress on run ''' progress_str = Util.format_progress(self.cnt_done.value , self.cnt_complete) time_elapsed = timedelta(seconds=round(time() - self.start_time)) progress_str = '=> [%s | %s | %s]' % (progress_str, time_elapsed, self.get_latest_analyzed_apk_name()) sys.stdout.write("\r" + " " * len(self.last_printed_str)) Util.print_dyn_progress(progress_str) self.last_printed_str = progress_str
def print_progess(self): ''' Show the progress on run ''' progress_str = Util.format_progress(self.cnt_done.value, self.cnt_complete) time_elapsed = timedelta(seconds=round(time() - self.start_time)) progress_str = '=> [%s | %s | %s]' % ( progress_str, time_elapsed, self.get_latest_analyzed_apk_name()) sys.stdout.write("\r" + " " * len(self.last_printed_str)) Util.print_dyn_progress(progress_str) self.last_printed_str = progress_str
def print_progess(self): ''' Show the progress on run ''' progress_str = Util.format_progress( self.get_chunked_cnt(self.get_total_run_tasks()), self.cnt_total_task) time_elapsed = timedelta(seconds=round(time() - self.start_time)) progress_str = 'Successful: %d, Failed: %d, Total: %s -- Time elapsed: %s' % ( self.successful_tasks, self.failed_tasks, progress_str, time_elapsed) Util.print_dyn_progress(progress_str)
def __init__(self, storage, script_list, script_hashes, min_script_needs, apks_or_paths, cnt_apks=None, storage_results=None, **kwargs): ''' Use the `import_scripts` method to get a list<type<AndroScript>> from a list of absolute paths (to the scripts). Parameters ---------- storage: RedundantStorage The storage to store the results. script_list: list<type<AndroScript>> List of `AndroScript`s references (not instantiated class!) script_hashes : list<str>, optional (default is None) If given, set the hash for the `AndroScript`s min_script_needs : tuple<bool> See :py:method:`ScriptUtil.get_maximal_script_options`. apks_or_paths: iterable<str> or list<Apk>, optional (default is []) List of `Apk` or paths to the apks which shall be analyzed with the given scripts If you analyze from paths the `import_date` is not set! cnt_apks : int, optional Total number of apks to analyze. If not given, calculate it. storage_results : Queue<tuple<str, bool>>, optional (default is Queue) Storage results. First component is the id of the entry and the second a boolean indication if the result has been stored in gridfs. Will be created if not supplied! Raises ------ AndroScriptError If an error happened while initializing some `AndroScript`. ''' super(BaseAnalyzer, self).__init__() if apks_or_paths is None: apks_or_paths = [] self.__storage = storage self.__script_list = script_list self.__script_hashes = script_hashes self.__min_script_needs = min_script_needs if cnt_apks is None: # calculate cnt apks if not given apks_or_paths, cnt_apks = Util.count_iterable_n_clone( apks_or_paths) self.__apks_or_paths = apks_or_paths self._cnt_apks = cnt_apks # shared memory self._cnt_analyzed_apks = Value('i', 0, lock=RLock()) if storage_results is None: storage_results = Queue() self._storage_results = storage_results
def __init__(self, storage, script_list, script_hashes, min_script_needs, apks_or_paths, cnt_apks = None, storage_results = None, **kwargs): ''' Use the `import_scripts` method to get a list<type<AndroScript>> from a list of absolute paths (to the scripts). Parameters ---------- storage: RedundantStorage The storage to store the results. script_list: list<type<AndroScript>> List of `AndroScript`s references (not instantiated class!) script_hashes : list<str>, optional (default is None) If given, set the hash for the `AndroScript`s min_script_needs : tuple<bool> See :py:method:`ScriptUtil.get_maximal_script_options`. apks_or_paths: iterable<str> or list<Apk>, optional (default is []) List of `Apk` or paths to the apks which shall be analyzed with the given scripts If you analyze from paths the `import_date` is not set! cnt_apks : int, optional Total number of apks to analyze. If not given, calculate it. storage_results : Queue<tuple<str, bool>>, optional (default is Queue) Storage results. First component is the id of the entry and the second a boolean indication if the result has been stored in gridfs. Will be created if not supplied! Raises ------ AndroScriptError If an error happened while initializing some `AndroScript`. ''' super(BaseAnalyzer, self).__init__() if apks_or_paths is None: apks_or_paths = [] self.__storage = storage self.__script_list = script_list self.__script_hashes = script_hashes self.__min_script_needs = min_script_needs if cnt_apks is None: # calculate cnt apks if not given apks_or_paths, cnt_apks = Util.count_iterable_n_clone(apks_or_paths) self.__apks_or_paths = apks_or_paths self._cnt_apks = cnt_apks # shared memory self._cnt_analyzed_apks = Value('i', 0, lock = RLock()) if storage_results is None: storage_results = Queue() self._storage_results = storage_results
def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except self.exception_tuple as e: # match on `caused_by` if self.caused_by_tuple is None or isinstance(e, WrapperException) and isinstance(e.caused_by, self.caused_by_tuple): # get self reference -> get subclass of RetryableTask cur_task = args[0] # use exponential backoff for retrying retry_time = CeleryUtil.exp_backoff(cur_task, self.max_retry_time) # log error Util.log_will_retry(retry_time, exc = e, what = func.__name__) # retry raise cur_task.retry(args = cur_task.get_retry_arguments(), exc = e, max_retries = self.max_retries, countdown = retry_time ) # no match on exceptions -> no retry! -> propagate exception raise e, None, sys.exc_info()[2]
def check_n_add(res): ''' Check if `res` is ready and has not been revoked etc. ''' try: if res is not None: result = res.get(propagate = False) # if chunked, result is list of multiple tasks -> unpack results if chunked: result = Util.flatten(result) # no result available if e.g. exception raised if result is not None: results.append(result) # TaskRevokedError except Exception: pass
def send_apk_args_generator(self, apk_gen): ''' Generator over arguments for sending APKs. Parameters ---------- generator<tuple<object, bool>> Generator over zip files or ids. Second component of tuples indicates that the generator is other the id's rather than over the zip files. See :py:method:`.AnalyzeUtil.apk_id_or_raw_data_gen` to get such a generator. ''' # get package names from initialized scripts script_packages = Util.module_names_from_class(self.script_list) for apk_zipfile_or_hash, is_id, fast_apk in apk_gen: yield script_packages, self.min_script_needs, self.script_hashes, apk_zipfile_or_hash, is_id, fast_apk
def __setup_scripts_hash_validation(self, androscripts, script_hashes): ''' Setup scripts. Also validate submitted script hashes if script reload is needed! Parameters ---------- androscripts : list<str> List of package names. script_hashes : list<str> If given, set the hash for the `AndroScript`s Raises ------ AnalyzeError If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised. ImportError ScriptHashValidationError If the validation of script hashes fails after reloading scripts from disk. ''' # need tuple to compare script_hashes = tuple(script_hashes) # import script modules script_types = ScriptUtil.import_scripts(androscripts, via_package=True, _reload=True) # instantiate scripts and get classes self.androscripts = ScriptUtil.instantiate_scripts( script_types, # needed for path calculation script_paths=[Util.package_name_2_path(s) for s in androscripts]) actual_hashes = tuple([s.hash for s in self.androscripts]) if sorted(actual_hashes) != sorted(script_hashes): raise ScriptHashValidationError(script_hashes, actual_hashes)
def __setup_scripts_hash_validation(self, androscripts, script_hashes): ''' Setup scripts. Also validate submitted script hashes if script reload is needed! Parameters ---------- androscripts : list<str> List of package names. script_hashes : list<str> If given, set the hash for the `AndroScript`s Raises ------ AnalyzeError If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised. ImportError ScriptHashValidationError If the validation of script hashes fails after reloading scripts from disk. ''' # need tuple to compare script_hashes = tuple(script_hashes) # import script modules script_types = ScriptUtil.import_scripts(androscripts, via_package = True, _reload = True) # instantiate scripts and get classes self.androscripts = ScriptUtil.instantiate_scripts(script_types, # needed for path calculation script_paths = [Util.package_name_2_path(s) for s in androscripts]) actual_hashes = tuple([s.hash for s in self.androscripts]) if sorted(actual_hashes) != sorted(script_hashes): raise ScriptHashValidationError(script_hashes, actual_hashes)
def fast_load_from_io(file_like_object=None, apk_file_path=None, calculate_hash=True): ''' Load a FastApk from file-like object or path by unzipping only the manifest file and calculating the hash. Parameters ---------- file_like_object : file-like-object, optional (default is None) A file-like obj that points to the apk. If non given, try to open a file_like_object from the given `apk_file_path`. apk_file_path: str, optional (default is "not set") Path of apk calculate_hash : bool If true calculate the hash. This means the file has be to loaded completely into memory. If False, the hash will be calculated the first time it gets retrieved. Returns ------- apk: FastApk Raises ------ CouldNotOpenApk If the apk file could not be opened CouldNotReadManifest If the manifest file could not be read ''' from androguard.core.bytecodes import apk as androapk from androguard.patch import zipfile # apk will be loaded from `flo` variable flo = file_like_object # indicates if file has been opened from path file_open_from_path = False # no file_like_object given, open file from path if file_like_object is None and isinstance(apk_file_path, str): try: flo = open(apk_file_path, "rb") file_open_from_path = True except IOError as e: flo.close() raise CouldNotOpenApk(apk_file_path, e), None, sys.exc_info()[2] # if file path not set, show at least that it's not seen in the exceptions if apk_file_path is None: apk_file_path = "not set" # load apk into memory and calculate hash if option set flo.seek(0) _hash = None if calculate_hash: data = flo.read() # calculate hash _hash = Util.sha256(data) flo.seek(0) try: if zipfile.is_zipfile(flo): z = zipfile.ZipFile(flo) # only read manifest from zip file binary_manifest = z.read(MANIFEST_FILENAME) ap = androapk.AXMLPrinter(binary_manifest) dom = minidom.parseString(ap.get_buff()) manifest_tag = dom.getElementsByTagName(MANIFEST_TAG_NAME) # check that manifest tag is available if len(manifest_tag) > 0: # get size of uncompresses .dex file size_app_code = z.getinfo(COMPILED_APP_CODE).file_size # get build date (last timestamp of classes.dex in zipfile) build_date = datetime( # use tuple from zipfile and pass the unpacked content to the constructor *z.getinfo(COMPILED_APP_CODE).date_time) manifest_items = manifest_tag[0].attributes # use the namespace to ignore wrong prefixes like "ns0" version_name = manifest_items.getNamedItemNS( MANIFEST_NS, MANIFEST_VERSION_NAME).nodeValue package = manifest_items.getNamedItem( MANIFEST_PACKAGE).nodeValue return FastApk(package, version_name, path=apk_file_path, _hash=_hash, size_app_code=size_app_code, build_date=build_date) raise CouldNotOpenManifest(apk_file_path), None, sys.exc_info()[2] except Exception as e: raise CouldNotOpenApk(apk_file_path, caused_by=e), None, sys.exc_info()[2] finally: # close file if manually opened from path if file_open_from_path: flo.close()
def androguard_load_from_io(file_like_object=None, apk_file_path=None, calculate_hash=True): ''' Load a FastApk from file-like object or path by using `androgaurd`. Parameters ---------- file_like_object : file-like-object, optional (default is None) A file-like obj that points to the apk. If non given, try to open a file_like_object from the given `apk_file_path`. apk_file_path: str, optional (default is "not set") Path of apk calculate_hash : bool If true calculate the hash. This means the file has be to loaded completely into memory. If False, the hash will be calculated the first time it gets retrieved. Returns ------- apk: FastApk Raises ------ CouldNotOpenApk If the apk file could not be opened ''' # prevent circular import from androlyze.analyze import AnalyzeUtil # apk will be loaded from `flo` variable flo = file_like_object # indicates if file has been opened from path file_open_from_path = False # no file_like_object given, open file from path if file_like_object is None and isinstance(apk_file_path, str): try: flo = open(apk_file_path, "rb") file_open_from_path = True except IOError as e: flo.close() raise CouldNotOpenApk(apk_file_path, e), None, sys.exc_info()[2] # if file path not set, show at least that it's not seen in the exceptions if apk_file_path is None: apk_file_path = "not set" # load apk into memory and calculate hash if option set flo.seek(0) _hash = None data = flo.read() if calculate_hash: # calculate hash _hash = Util.sha256(data) flo.seek(0) apk = AnalyzeUtil.open_apk(data, raw=True, path=apk_file_path) if file_open_from_path: flo.close() if apk is not None: try: return FastApk.load_from_eandroapk(apk) except KeyError: pass # could not open apk -> raise error raise CouldNotOpenApk(file_path=apk_file_path)
def fast_load_from_io(file_like_object = None, apk_file_path = None, calculate_hash = True): ''' Load a FastApk from file-like object or path by unzipping only the manifest file and calculating the hash. Parameters ---------- file_like_object : file-like-object, optional (default is None) A file-like obj that points to the apk. If non given, try to open a file_like_object from the given `apk_file_path`. apk_file_path: str, optional (default is "not set") Path of apk calculate_hash : bool If true calculate the hash. This means the file has be to loaded completely into memory. If False, the hash will be calculated the first time it gets retrieved. Returns ------- apk: FastApk Raises ------ CouldNotOpenApk If the apk file could not be opened CouldNotReadManifest If the manifest file could not be read ''' from androguard.core.bytecodes import apk as androapk from androguard.patch import zipfile # apk will be loaded from `flo` variable flo = file_like_object # indicates if file has been opened from path file_open_from_path = False # no file_like_object given, open file from path if file_like_object is None and isinstance(apk_file_path, str): try: flo = open(apk_file_path, "rb") file_open_from_path = True except IOError as e: flo.close() raise CouldNotOpenApk(apk_file_path, e), None, sys.exc_info()[2] # if file path not set, show at least that it's not seen in the exceptions if apk_file_path is None: apk_file_path = "not set" # load apk into memory and calculate hash if option set flo.seek(0) _hash = None if calculate_hash: data = flo.read() # calculate hash _hash = Util.sha256(data) flo.seek(0) try: if zipfile.is_zipfile(flo): z = zipfile.ZipFile(flo) # only read manifest from zip file binary_manifest = z.read(MANIFEST_FILENAME) ap = androapk.AXMLPrinter(binary_manifest) dom = minidom.parseString(ap.get_buff()) manifest_tag = dom.getElementsByTagName(MANIFEST_TAG_NAME) # check that manifest tag is available if len(manifest_tag) > 0: # get size of uncompresses .dex file size_app_code = z.getinfo(COMPILED_APP_CODE).file_size # get build date (last timestamp of classes.dex in zipfile) build_date = datetime( # use tuple from zipfile and pass the unpacked content to the constructor *z.getinfo(COMPILED_APP_CODE).date_time ) manifest_items = manifest_tag[0].attributes # use the namespace to ignore wrong prefixes like "ns0" version_name = manifest_items.getNamedItemNS(MANIFEST_NS, MANIFEST_VERSION_NAME).nodeValue package = manifest_items.getNamedItem(MANIFEST_PACKAGE).nodeValue return FastApk(package, version_name, path = apk_file_path, _hash = _hash, size_app_code = size_app_code, build_date = build_date) raise CouldNotOpenManifest(apk_file_path), None, sys.exc_info()[2] except Exception as e: raise CouldNotOpenApk(apk_file_path, caused_by = e), None, sys.exc_info()[2] finally: # close file if manually opened from path if file_open_from_path: flo.close()
def androguard_load_from_io(file_like_object = None, apk_file_path = None, calculate_hash = True): ''' Load a FastApk from file-like object or path by using `androgaurd`. Parameters ---------- file_like_object : file-like-object, optional (default is None) A file-like obj that points to the apk. If non given, try to open a file_like_object from the given `apk_file_path`. apk_file_path: str, optional (default is "not set") Path of apk calculate_hash : bool If true calculate the hash. This means the file has be to loaded completely into memory. If False, the hash will be calculated the first time it gets retrieved. Returns ------- apk: FastApk Raises ------ CouldNotOpenApk If the apk file could not be opened ''' # prevent circular import from androlyze.analyze import AnalyzeUtil # apk will be loaded from `flo` variable flo = file_like_object # indicates if file has been opened from path file_open_from_path = False # no file_like_object given, open file from path if file_like_object is None and isinstance(apk_file_path, str): try: flo = open(apk_file_path, "rb") file_open_from_path = True except IOError as e: flo.close() raise CouldNotOpenApk(apk_file_path, e), None, sys.exc_info()[2] # if file path not set, show at least that it's not seen in the exceptions if apk_file_path is None: apk_file_path = "not set" # load apk into memory and calculate hash if option set flo.seek(0) _hash = None data = flo.read() if calculate_hash: # calculate hash _hash = Util.sha256(data) flo.seek(0) apk = AnalyzeUtil.open_apk(data, raw = True, path = apk_file_path) if file_open_from_path: flo.close() if apk is not None: try: return FastApk.load_from_eandroapk(apk) except KeyError: pass # could not open apk -> raise error raise CouldNotOpenApk(file_path = apk_file_path)
def print_progess(self): ''' Show the progress on run ''' progress_str = Util.format_progress(self.get_chunked_cnt(self.get_total_run_tasks()) , self.cnt_total_task) time_elapsed = timedelta(seconds=round(time() - self.start_time)) progress_str = 'Successful: %d, Failed: %d, Total: %s -- Time elapsed: %s' % (self.successful_tasks, self.failed_tasks, progress_str, time_elapsed) Util.print_dyn_progress(progress_str)
def print_progess(cnt_analyzed): progress_str = Util.format_progress(cnt_analyzed * tasks_per_chunk, total_cnt) time_elapsed = timedelta(seconds=round(time() - start)) progress_str = '%s, Time elapsed: %s' % (progress_str, time_elapsed) Util.print_dyn_progress(progress_str)
def analyze_apk(eandro_apk, scripts, min_script_needs, propagate_error=False, reset_scripts=True): ''' Analyze the `eandro_apk` with the given `scripts` assuming each `AndroScript` neads at least `min_script_needs`. Be sure that you reseted the `scripts`! Parameters ---------- eandro_apk : EAndroApk The apk. scripts : iterable<AndroScript> The scripts to use for the analysis. min_script_needs : tuple<bool> See :py:meth:ScriptUtil.get_maximal_script_options` propagate_error : bool, optional (default is False) If true propagate errors. reset_scripts : bool, optional (default is True) If given, reset the `AndroScript` before analyzing. Returns ------- list<FastApk, list<AndroScript>> Uses `FastApk` to only store the meta information, not the apk data! None If error happened. ''' from androlyze.analyze.exception import AndroScriptError try: # reset scripts if reset_scripts: for s in scripts: s.reset() if eandro_apk is not None: fastapk = None # analyze classes.dex with script requirements and get time args = [eandro_apk.get_dex()] + list(min_script_needs) time_s, analysis_objs = Util.timeit(analyze_dex, *args, raw=True) script_results = [] for s in scripts: try: result_obj = s.analyze(eandro_apk, *analysis_objs) # we only need the meta infos of the apk if eandro_apk is not None: fastapk = FastApk.load_from_eandroapk(eandro_apk) # set androguard analysis time if script wants stats s.add_apk_androguard_analyze_time(time_s) # link to apk if isinstance(result_obj, ResultObject): result_obj.set_apk(fastapk) script_results.append(s) except Exception as e: if propagate_error: raise else: log.exception(AndroScriptError(s, e)) if fastapk is not None: # use fastapk to only store the meta information, not the apk data! return [fastapk, script_results] # interrupt analysis if analysis objects could not be created! except DexError as e: log.exception(e)
def converter(obj): if isinstance(obj, datetime): return Util.datetime_to_iso8601(obj) elif isinstance(obj, ObjectId): return str(obj)
def _analyze(self): ''' See doc of :py:method:BaseAnalyzer.analyze`. ''' try: work_queue = self.work_queue # create worker pool log.debug("starting %s workers ...", self.concurrency) for _ in range(self.concurrency): p = Worker(self.script_list, self.script_hashes, self.min_script_needs, work_queue, self.storage, self.cnt_analyzed_apks, self.analyzed_apks, self.storage_results) self.workers.append(p) p.daemon = True # start workers for p in self.workers: p.start() # queue has size limit -> start workers first then enqueue items log.info("Loading apk paths into work queue ...") for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths): # task is apk with all scripts work_queue.put(apk_stuff) for _ in range(self.concurrency): # signal end-of-work work_queue.put(STOP_SENTINEL) # progress view for cli av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks, self.analyzed_apks) av.daemon = True av.start() # block until workers finished work_queue.join() av.terminate() log.debug("joined on work queue ...") return self.cnt_analyzed_apks.value # try hot shutdown first except KeyboardInterrupt: log.warn("Hot shutdown ... ") try: log.warn("clearing work queue ... ") Util.clear_queue(work_queue) log.warn("cleared work queue ... ") for _ in range(self.concurrency): # signal end-of-work work_queue.put(STOP_SENTINEL) for worker in self.workers: worker.join() log.warn("waited for all workers ... ") return self.cnt_analyzed_apks.value # if user really wants make a cold shutdown -> kill processes except KeyboardInterrupt: log.warn("Cold shutdown ... ") log.warn("Hard shutdown wanted! Killing all workers!") # kill processes via SIGINT -> send CTRL-C for w in self.workers: try: os.kill(w.pid, signal.SIGINT) except: pass return self.cnt_analyzed_apks.value
__author__ = "Nils Tobias Schmidt" __email__ = "schmidt89 at informatik.uni-marburg.de" import pickle from celery import Celery from kombu.serialization import register from androlyze.Constants import PROJECT_NAME from androlyze.celery.celerysettings import settings from androlyze.settings import * from androlyze.util import Util from androlyze.analyze.distributed.tasks.AnalyzeTask import AnalyzeTask from celery.registry import tasks # worker has to import androguard too Util.set_androguard_path(settings) # set pickle to specific protocol register('pickle', lambda s: pickle.dumps(s, 2), lambda s: pickle.loads(s), content_type='application/x-python-serialize', content_encoding='binary') app = Celery(PROJECT_NAME) # load config app.config_from_object(CELERY_CONF) if __name__ == '__main__': app.start()
disable_std_loggers() else: log_set_level(LOG_LEVEL) clilog_set_level(logging.INFO) # write to file with specified log level redirect_to_file_handler(logger_filename, LOG_LEVEL) if __name__ == "__main__": if PROFILE: import cProfile import pstats profile_filename = 'androlyze.Main_profile.txt' cProfile.run('main()', profile_filename) statsfile = open("profile_stats.txt", "wb") p = pstats.Stats(profile_filename, stream=statsfile) stats = p.strip_dirs().sort_stats('cumulative') stats.print_stats() statsfile.close() sys.exit(0) time, ret_code = Util.timeit(main) log.warn('Took %s (h/m/s)\n' % datetime.timedelta(seconds=round(time))) if DEBUG: with open("time.txt", "a") as f: f.write('%s : %s\n' % (datetime.datetime.now(), datetime.timedelta(seconds=round(time)))) sys.exit()
def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs): ''' Analyze by running all `AndroScript`s ''' # log script meta ? log_script_meta = kwargs.get("log_script_meta", True) # may be disabled! check! if not self.log_chained_script_meta_infos(): log_script_meta = False # don't log script meta infos in chained scripts inside this `ChainedScript` kwargs["log_script_meta"] = False if log_script_meta: # log meta infos self._log_chained_script_meta() # collect results from scripts collected_results = self.res # run over scripts for ascript in self.chain_scripts(): script_result = None chained_script_name = self.try_get_chained_script_name(ascript) try: # analyze with script script_result = ascript.analyze(apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs) # store results under given categories categories = self.root_categories() if len(categories) > 0: # run over dict and log items for key, val in script_result.results.items(): collected_results.register_keys([key], *categories) collected_results.log(key, val, *categories) else: # simply update dict collected_results.results.update(script_result.results) if log_script_meta: # log successful run collected_results.log_append_to_enum( CAT_SUCCESSFUL, chained_script_name, CAT_ROOT) except Exception as e: if log_script_meta: # the value that will be logged for the script failure failure_log_val = chained_script_name # if exception shall be logged, create dict with name as key and exception as value if self.log_script_failure_exception(): # exception message exc_msg = Util.format_exception(sys.exc_info(), as_string=False) failure_log_val = {failure_log_val: exc_msg} # log that script encountered an error collected_results.log_append_to_enum( CAT_FAILURES, failure_log_val, CAT_ROOT) if not self.continue_on_script_failure(): # reraise exception if the analysis shall be stopped # after a script encountered an error raise else: log.warn( '''%s: The script "%s" on apk: %s caused an error! But the other scripts will still run! Have a look at the options of `ChainedScript` for exception traceback writing! \tError: %s''' % (self.__class__.__name__, ascript, apk.short_description(), e))
def import_scripts(script_list, via_package = False, _reload = False, clazz_name = None): ''' Import the scripts (via file path or package name - configurable via `via_pacakge`). Parameters ---------- script_list: list<str> list of script names (absolute path) or package names. via_package : bool, optional (default is False) If true, assume package names are given instead of file paths. _reload : bool, optional (default is False) Reload scripts and delete them from internal cache. Only possible if `via_package`. clazz_name : optional (default is None) The name of the class to import. If none, use the name of the module. Returns ------- list<type<AndroScript>> list of uninstantiated AndroScript classes Raises ------ AnalyzeError If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised. ImportError ''' # late import -> pervent recursive import from androlyze.model.script.AndroScript import AndroScript from androlyze.analyze.exception import AnalyzeError androscripts = [] # reload scripts if wanted if via_package and _reload: for script_package in script_list: log.debug("deleting %s from system modules", script_package) try: del sys.modules[script_package] log.debug("deleted") except KeyError: pass for script in script_list: class_name = clazz_name if not class_name: if via_package: class_name = script.split(".")[-1] else: class_name = basename(script.split(".py")[0]) # class name must be equivalent to the module name! try: module_package = script # get package name from path and cut off file extension if not via_package: module_package = Util.path_2_package_name(script) module = importlib.import_module(module_package) clazz = getattr(module, class_name) # check if class is derived from AndroScript if isinstance(clazz, AndroScript.__class__): androscripts.append(clazz) else: raise NoAndroScriptSubclass(clazz), None, sys.exc_info()[2] except AttributeError as e: raise ModuleNotSameClassNameException(script, class_name), None, sys.exc_info()[2] except IOError as e: e.filename = script raise except (NoAndroScriptSubclass, ModuleNotSameClassNameException, IOError) as e: raise AnalyzeError(e), None, sys.exc_info()[2] return androscripts
def fetch_results_from_mongodb(self, rds, results, wait_for_db = True, # progress nice_progess = False, synced_entries = None, total_sync_entries = None): ''' Fetch some results from the result database and write them to disk. If data cannot be loaded from db, try until it can be. Parameters ---------- rds : ResultDatabaseStorage The database to query for the results. results : list< tuple<id, gridfs (bool)> > Define which results shall be fetched. wait_for_db : bool, optional (default is True) Wait until data could be fetched from db. nice_progess : bool, optional (default is False) If enabled update show some nice progress bar on the cli. synced_entries : multiprocessing.Value<int>, optional (default is None) If supplied store number of already synces entries. total_sync_entries : multiprocessing.Value<int>, optional (default is None) If supplied store number of total entries to sync. Raises ------ DatabaseLoadException If `wait_for_db` is False and an error occurred. ''' # retry in ... seconds DATABASE_RETRY_TIME = 5 # if true assume both counts are shared memory (Value) use_shared_memory = synced_entries is not None and total_sync_entries is not None if results is not None: results_stored = False while not results_stored: try: # get ids non_gridfs_ids, gridfs_ids = MongoUtil.split_result_ids(results) # counts cnt_non_gridfs_ids = len(non_gridfs_ids) cnt_gridfs_ids = len(gridfs_ids) if use_shared_memory: total_sync_entries.value = cnt_gridfs_ids + cnt_non_gridfs_ids # gridfs raw data as well as metadata gridfs_entries_raw = [] if gridfs_ids: gridfs_entries_raw = rds.get_results_for_ids(gridfs_ids, non_document = True, non_document_raw = True) # regular documents (non gridfs) non_gridfs_entries = [] if non_gridfs_ids: non_gridfs_entries = rds.get_results_for_ids(non_gridfs_ids, non_document = False, non_document_raw = True) if not nice_progess: log.debug("fetching %d non-documents (gridfs) ... ", cnt_gridfs_ids) for i, gridfs_entry_raw in enumerate(gridfs_entries_raw, 1): # get our stored metadata (for script and apk) gridfs_entry_meta = gridfs_entry_raw.metadata if not nice_progess: log.debug("getting results for %s", gridfs_entry_meta[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME]) else: Util.print_dyn_progress(Util.format_progress(i, cnt_gridfs_ids)) # use apk to extract data from dict fastapk = FastApk.load_from_result_dict(gridfs_entry_meta) # get filename file_name = gridfs_entry_raw.filename # write results to disk try: self.store_custom_data(fastapk.package_name, fastapk.version_name, fastapk.hash, file_name, gridfs_entry_raw.read()) except FileSysStoreException as e: log.exception(e) # update shared memory progress indicitor if use_shared_memory: with synced_entries.get_lock(): synced_entries.value += 1 if not nice_progess: log.debug("fetching %d documents (non-gridfs) ... ", cnt_non_gridfs_ids) for i, non_gridfs_entry in enumerate(non_gridfs_entries, 1): if not nice_progess: clilog.debug("getting results for %s" % non_gridfs_entry[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME]) else: Util.print_dyn_progress(Util.format_progress(i, cnt_non_gridfs_ids)) # write results to disk self.store_result_dict(non_gridfs_entry) # update shared memory progress indicitor if use_shared_memory: with synced_entries.get_lock(): synced_entries.value += 1 # if not wait for db wanted stop here results_stored = True or not wait_for_db except (DatabaseLoadException, PyMongoError) as e: if not wait_for_db: raise log.warn(e) Util.log_will_retry(DATABASE_RETRY_TIME, exc = e) sleep(DATABASE_RETRY_TIME)
def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs): ''' Analyze by running all `AndroScript`s ''' # log script meta ? log_script_meta = kwargs.get("log_script_meta", True) # may be disabled! check! if not self.log_chained_script_meta_infos(): log_script_meta = False # don't log script meta infos in chained scripts inside this `ChainedScript` kwargs["log_script_meta"] = False if log_script_meta: # log meta infos self._log_chained_script_meta() # collect results from scripts collected_results = self.res # run over scripts for ascript in self.chain_scripts(): script_result = None chained_script_name = self.try_get_chained_script_name(ascript) try: # analyze with script script_result = ascript.analyze(apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs) # store results under given categories categories = self.root_categories() if len(categories) > 0: # run over dict and log items for key, val in script_result.results.items(): collected_results.register_keys([key], *categories) collected_results.log(key, val, *categories) else: # simply update dict collected_results.results.update(script_result.results) if log_script_meta: # log successful run collected_results.log_append_to_enum(CAT_SUCCESSFUL, chained_script_name, CAT_ROOT) except Exception as e: if log_script_meta: # the value that will be logged for the script failure failure_log_val = chained_script_name # if exception shall be logged, create dict with name as key and exception as value if self.log_script_failure_exception(): # exception message exc_msg = Util.format_exception(sys.exc_info(), as_string = False) failure_log_val = {failure_log_val : exc_msg} # log that script encountered an error collected_results.log_append_to_enum(CAT_FAILURES, failure_log_val, CAT_ROOT) if not self.continue_on_script_failure(): # reraise exception if the analysis shall be stopped # after a script encountered an error raise else: log.warn('''%s: The script "%s" on apk: %s caused an error! But the other scripts will still run! Have a look at the options of `ChainedScript` for exception traceback writing! \tError: %s''' % (self.__class__.__name__, ascript, apk.short_description(), e))
def analyze_apk(eandro_apk, scripts, min_script_needs, propagate_error = False, reset_scripts = True): ''' Analyze the `eandro_apk` with the given `scripts` assuming each `AndroScript` neads at least `min_script_needs`. Be sure that you reseted the `scripts`! Parameters ---------- eandro_apk : EAndroApk The apk. scripts : iterable<AndroScript> The scripts to use for the analysis. min_script_needs : tuple<bool> See :py:meth:ScriptUtil.get_maximal_script_options` propagate_error : bool, optional (default is False) If true propagate errors. reset_scripts : bool, optional (default is True) If given, reset the `AndroScript` before analyzing. Returns ------- list<FastApk, list<AndroScript>> Uses `FastApk` to only store the meta information, not the apk data! None If error happened. ''' from androlyze.analyze.exception import AndroScriptError try: # reset scripts if reset_scripts: for s in scripts: s.reset() if eandro_apk is not None: fastapk = None # analyze classes.dex with script requirements and get time args = [eandro_apk.get_dex()] + list(min_script_needs) time_s, analysis_objs = Util.timeit(analyze_dex, *args, raw = True) script_results = [] for s in scripts: try: result_obj = s.analyze(eandro_apk, *analysis_objs) # we only need the meta infos of the apk if eandro_apk is not None: fastapk = FastApk.load_from_eandroapk(eandro_apk) # set androguard analysis time if script wants stats s.add_apk_androguard_analyze_time(time_s) # link to apk if isinstance(result_obj, ResultObject): result_obj.set_apk(fastapk) script_results.append(s) except Exception as e: if propagate_error: raise else: log.exception(AndroScriptError(s, e)) if fastapk is not None: # use fastapk to only store the meta information, not the apk data! return [fastapk, script_results] # interrupt analysis if analysis objects could not be created! except DexError as e: log.exception(e)
if quiet: disable_std_loggers() else: log_set_level(LOG_LEVEL) clilog_set_level(logging.INFO) # write to file with specified log level redirect_to_file_handler(logger_filename, LOG_LEVEL) if __name__ == "__main__": if PROFILE: import cProfile import pstats profile_filename = 'androlyze.Main_profile.txt' cProfile.run('main()', profile_filename) statsfile = open("profile_stats.txt", "wb") p = pstats.Stats(profile_filename, stream=statsfile) stats = p.strip_dirs().sort_stats('cumulative') stats.print_stats() statsfile.close() sys.exit(0) time, ret_code = Util.timeit(main) log.warn('Took %s (h/m/s)\n' % datetime.timedelta(seconds=round(time))) if DEBUG: with open("time.txt", "a") as f: f.write('%s : %s\n' % (datetime.datetime.now(), datetime.timedelta(seconds=round(time)))) sys.exit()