def copy_apk(self, apk, file_like_obj, **kwargs): ''' See doc of :py:meth:`.ApkCopyInterface.copy_apk`. Inserts the apk from the `file_like_obj` into mongodb's gridfs, but only if not already in db. Returns ------- The id of the apk (in db) ''' file_like_obj.seek(0) try: gridfs = self.__apk_coll # escape keys accoring to mongodb rules apk_meta = escape_keys(apk.meta_dict()) _id = apk.hash # gridfs doesn't have an update method -> delete and insert if not gridfs.exists(**{RESOBJ_ID : _id}): # store file together with metadata filename = os.path.basename(apk_meta[RESOBJ_APK_META][RESOBJ_APK_META_PATH]) gridfs.put(file_like_obj, metadata = apk_meta[RESOBJ_APK_META], filename = filename, _id = _id, chunkSize = MAX_BSON_SIZE) log.info("put %s into %s", apk.short_description(), self) except (PyMongoError, BSONError) as e: raise DatabaseStoreException(self, "apk: %s" % apk.short_description(), caused_by = e), None, sys.exc_info()[2] # return id return _id
def __init__(self, import_db_name): ''' Open the database and create the table structure if not already existing. Parameters ---------- import_db_name : str Name of the database to use. Raises ------ DatabaseOpenError If the database could not be opened or set up. ''' log.info("Opening database %s", import_db_name) self.__db_name = import_db_name try: self.__conn = None self.__conn = sqlite3.connect( import_db_name, timeout=self.TIMEOUT, # use the declared type to determine the approriate converter/adapter # needed for date storage detect_types=sqlite3.PARSE_DECLTYPES) self.conn.row_factory = self.__key_val_description # create the tables if not existing self.__create() # upgrade db to latest layout self.__upgrade_db() except sqlite3.Error as e: raise DatabaseOpenError(import_db_name, caused_by=e), None, sys.exc_info()[2]
def __init__(self, import_db_name): ''' Open the database and create the table structure if not already existing. Parameters ---------- import_db_name : str Name of the database to use. Raises ------ DatabaseOpenError If the database could not be opened or set up. ''' log.info("Opening database %s", import_db_name) self.__db_name = import_db_name try: self.__conn = None self.__conn = sqlite3.connect(import_db_name, timeout = self.TIMEOUT, # use the declared type to determine the approriate converter/adapter # needed for date storage detect_types = sqlite3.PARSE_DECLTYPES ) self.conn.row_factory = self.__key_val_description # create the tables if not existing self.__create() # upgrade db to latest layout self.__upgrade_db() except sqlite3.Error as e: raise DatabaseOpenError(import_db_name, caused_by = e), None, sys.exc_info()[2]
def __init__(self, config_filename, import_db = None): ''' Parameters ---------- config_filename : str, optional (default is `settings.CONFIG_PATH`) The path to the config to load. import_db : str, optional (default is read from config file) Path to the import db. ''' # type: Settings if config_filename is None: config_filename = settings.CONFIG_PATH # create settings variable self.__settings = Settings(config_filename, default_path = settings.DEFAULTS_PATH) log.debug("config file settings: %s\n\tCLI options may overwrite them!", self.__settings) # load and set androguard path from configs Util.set_androguard_path(self.settings) # type: str import_db = self._get_import_db(import_db = import_db) #self.args.import_database log.info("Using import database: %s", import_db) # load a few other settings self.__storage = self._create_storage(import_db)
def __init__(self, aws_id, aws_key, aws_bucket_name, s3_hostname = None): ''' Parameters ---------- aws_id : str ID of the Amazon AWS account. aws_key : str Key of the Amazon AWS account. aws_bucket_name : str Bucket name where the APKs are stored. s3_hostname : str, optional (default is None) The URL for the S3 storage. E.g. "s3-eu-west-1.amazonaws.com" ''' self._s3_conn = S3Connection(aws_id, aws_key, host=s3_hostname) self._apk_bucket_name = aws_bucket_name if "." in aws_bucket_name: raise RuntimeError("Do not use '.' inside the bucket name: '%s'" % aws_bucket_name) try: self._apk_bucket = self.s3_conn.get_bucket(self.apk_bucket_name) log.info("opening %s", self) except (BotoClientError, S3ResponseError) as e: raise S3StorageOpenError(self.apk_bucket_name, caused_by = e)
def __init__(self, storage, script_list, script_hashes, min_script_needs, apks_or_paths, concurrency=None): ''' See :py:method`.BaseAnalyzer.__init__` for details on the first attributes. Other Parameters ---------------- concurrency : int, optional (default is number of cpu cores) Number of workers to spawn. ''' super(ParallelAnalyzer, self).__init__(storage, script_list, script_hashes, min_script_needs, apks_or_paths) # parallelization parameters if concurrency is None: concurrency = cpu_count() self.__concurrency = concurrency log.info("concurrency: %s", self.concurrency) log.info("Using processes") # parallel stuff, concerning processes self.__work_queue = Queue() self.__work_queue.cancel_join_thread() self.__workers = [] self.__analyzed_apks = Queue()
def __init__(self, aws_id, aws_key, aws_bucket_name, s3_hostname=None): ''' Parameters ---------- aws_id : str ID of the Amazon AWS account. aws_key : str Key of the Amazon AWS account. aws_bucket_name : str Bucket name where the APKs are stored. s3_hostname : str, optional (default is None) The URL for the S3 storage. E.g. "s3-eu-west-1.amazonaws.com" ''' self._s3_conn = S3Connection(aws_id, aws_key, host=s3_hostname) self._apk_bucket_name = aws_bucket_name if "." in aws_bucket_name: raise RuntimeError("Do not use '.' inside the bucket name: '%s'" % aws_bucket_name) try: self._apk_bucket = self.s3_conn.get_bucket(self.apk_bucket_name) log.info("opening %s", self) except (BotoClientError, S3ResponseError) as e: raise S3StorageOpenError(self.apk_bucket_name, caused_by=e)
def __init__(self, storage, script_list, script_hashes, min_script_needs, apks_or_paths, concurrency = None): ''' See :py:method`.BaseAnalyzer.__init__` for details on the first attributes. Other Parameters ---------------- concurrency : int, optional (default is number of cpu cores) Number of workers to spawn. ''' super(ParallelAnalyzer, self).__init__(storage, script_list, script_hashes, min_script_needs, apks_or_paths) # parallelization parameters if concurrency is None: concurrency = cpu_count() self.__concurrency = concurrency log.info("concurrency: %s", self.concurrency) log.info("Using processes") # parallel stuff, concerning processes self.__work_queue = Queue() self.__work_queue.cancel_join_thread() self.__workers = [] self.__analyzed_apks = Queue()
def __del__(self): ''' Close database ''' try: log.info("Closing database %s", self.__db_name) if self.conn is not None: self.conn.close() except sqlite3.Error as e: log.warn(e)
def __setup_db(self): ''' Open database if not already done. Db will only be set up per process, not for each task! Raises ------ StorageException Error while opening. ''' if self.result_database_storage is None: log.info("setup_db ...") self.result_database_storage = ResultDatabaseStorage.factory_from_config(settings)
def __setup_scripts_reuse(self, androscripts, script_hashes): ''' Setup scripts but first try to reuse them. This is done by comparing the hashes. If they equal -> reuse them! Otherwise reload from disk. Parameters ---------- androscripts : list<str> List of package names. script_hashes : list<str> If given, set the hash for the `AndroScript`s Raises ------ AnalyzeError If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised. ImportError ''' # need tuple to compare script_hashes = tuple(script_hashes) script_reload_needed = script_hashes != self.script_hashes # script can be reused -> simply reset them # stupid comparison cause same scripts in different order are not reused # but reusing is rather intended for a reuse in the same analysis (where the order is kept) if not script_reload_needed: log.info("reusing scripts ... ") for s in self.androscripts: s.reset() # cannot be reused else: log.info("reloading scripts cause hashes changed ... ") # (re)import script modules script_types = ScriptUtil.import_scripts(androscripts, via_package=True, _reload=True) # instantiate scripts and get classes self.androscripts = ScriptUtil.instantiate_scripts( script_types, script_hashes=script_hashes) # set hashes for next comparison self.script_hashes = script_hashes
def __setup_db(self): ''' Open database if not already done. Db will only be set up per process, not for each task! Raises ------ StorageException Error while opening. ''' if self.result_database_storage is None: log.info("setup_db ...") self.result_database_storage = ResultDatabaseStorage.factory_from_config( settings)
def __setup_scripts_reuse(self, androscripts, script_hashes): ''' Setup scripts but first try to reuse them. This is done by comparing the hashes. If they equal -> reuse them! Otherwise reload from disk. Parameters ---------- androscripts : list<str> List of package names. script_hashes : list<str> If given, set the hash for the `AndroScript`s Raises ------ AnalyzeError If an NoAndroScriptSubclass, IOError or ModuleNotSameClassNameException has been raised. ImportError ''' # need tuple to compare script_hashes = tuple(script_hashes) script_reload_needed = script_hashes != self.script_hashes # script can be reused -> simply reset them # stupid comparison cause same scripts in different order are not reused # but reusing is rather intended for a reuse in the same analysis (where the order is kept) if not script_reload_needed: log.info("reusing scripts ... ") for s in self.androscripts: s.reset() # cannot be reused else: log.info("reloading scripts cause hashes changed ... ") # (re)import script modules script_types = ScriptUtil.import_scripts(androscripts, via_package = True, _reload = True) # instantiate scripts and get classes self.androscripts = ScriptUtil.instantiate_scripts(script_types, script_hashes = script_hashes) # set hashes for next comparison self.script_hashes = script_hashes
def get_apk(self, _hash, **kwargs): ''' Get the `EAndroApk` from `_hash`. Parameters ---------- _hash : str Hash of the .apk (sha256) Raises ------ DatabaseLoadException NoFile If the file is not present. Returns ------- EAndroApk Apk constructed from raw data and meta infos. ''' try: gridfs = self.__apk_coll log.info("getting apk: %s from mongodb ...", _hash) gridfs_obj = gridfs.get(_hash) # get raw .apk apk_zipfile = gridfs_obj.read() # get apk meta infos apk_meta = gridfs_obj.metadata package_name, version_name, path, _hash, import_date, tag = apk_meta[RESOBJ_APK_META_PACKAGE_NAME], apk_meta[RESOBJ_APK_META_VERSION_NAME], apk_meta[RESOBJ_APK_META_PATH], apk_meta[RESOBJ_APK_META_HASH], apk_meta[RESOBJ_APK_META_IMPORT_DATE], apk_meta[RESOBJ_APK_META_TAG] # use to hold apk meta infos fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag) eandro_apk = AnalyzeUtil.open_apk(apk_zipfile, fast_apk, raw = True) log.info("got apk") return eandro_apk except NoFile: raise except PyMongoError as e: raise DatabaseLoadException(self, content = "Apk (hash=%s)" % _hash, caused_by = e), None, sys.exc_info()[2]
def create_analyzer(): analyzer = None # argument for BaseAnalyzer args = storage, androscript_list, script_hashes, min_script_needs, apks_or_paths log.info("Mode: %s", mode) # normal analyzer if mode == ANALYZE_MODE_NON_PARALLEL: from androlyze.analyze.Analyzer import Analyzer analyzer = Analyzer(*args) # use parallel analyzer elif mode == ANALYZE_MODE_PARALLEL: from androlyze.analyze.parallel.ParallelAnalyzer import ParallelAnalyzer analyzer = ParallelAnalyzer(*args, concurrency = concurrency) # use distributed one elif mode == ANALYZE_MODE_DISTRIBUTED: from androlyze.analyze.distributed.DistributedAnalyzer import DistributedAnalyzer analyzer = DistributedAnalyzer(*args, concurrency = concurrency, serialize_apks = serialize_apks) return analyzer
def prefetch_apk(self, task_id, task, *args, **kwargs): ''' Prefetch the `APK`s if mongodb is used as distributed apk storage. If the prefetch fails, the task will be retried. ''' try: # open db if not already opened self.__setup_db() args = kwargs["args"] _, _, _, apk_zipfile_or_hash, is_hash, fast_apk = args # prefetch apk via hash if given if is_hash: # get apk from the apk storage eandro_apk = self.__get_apk_from_storage(apk_zipfile_or_hash, apk = fast_apk) if eandro_apk is not None: # store in prefetch pool apk_prefetch_pool[apk_zipfile_or_hash] = eandro_apk log.info("prefetched: %s, size apk cache: %d", eandro_apk.short_description(), len(apk_prefetch_pool)) # abort if file not in db! except (NoFile, DatabaseOpenError, DatabaseLoadException) as e: log.exception(e)
def set_androguard_path(settings): ''' Set the path to androguard from read from `settings` if not already in python path! Parameters ---------- settings : Settings ''' # check if path already set try: import androguard return except ImportError: pass from androlyze.settings import SECTION_ANDROGUARD, KEY_ANDROGUARD_PATH ANDROGUARD_PATH = settings[(SECTION_ANDROGUARD, KEY_ANDROGUARD_PATH)] # set androguard location before importing any androguard stuff sys.path.append(ANDROGUARD_PATH) log.info('appending "%s" to sys.path', ANDROGUARD_PATH)
def prefetch_apk(self, task_id, task, *args, **kwargs): ''' Prefetch the `APK`s if mongodb is used as distributed apk storage. If the prefetch fails, the task will be retried. ''' try: # open db if not already opened self.__setup_db() args = kwargs["args"] _, _, _, apk_zipfile_or_hash, is_hash, fast_apk = args # prefetch apk via hash if given if is_hash: # get apk from the apk storage eandro_apk = self.__get_apk_from_storage(apk_zipfile_or_hash, apk=fast_apk) if eandro_apk is not None: # store in prefetch pool apk_prefetch_pool[apk_zipfile_or_hash] = eandro_apk log.info("prefetched: %s, size apk cache: %d", eandro_apk.short_description(), len(apk_prefetch_pool)) # abort if file not in db! except (NoFile, DatabaseOpenError, DatabaseLoadException) as e: log.exception(e)
def run(self, androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash = True, fast_apk = None): ''' Do the analysis on the apk with the given scripts. Parameters ---------- androscripts : list<str> List of package names. script_hashes : list<str> If given, set the hash for the `AndroScript`s min_script_needs : tuple<bool> See :py:method:`ScriptUtil.get_maximal_script_options`. apk_zipfile_or_hash : str The raw contents of the .apk file or the hash (sha256). The raw content of the .apk file (zipfile) or the hash of it (id in db). is_hash : bool, optional (default is True) Determines if `apk_zipfile_or_hash` is a hash (id). fast_apk : FastApk, optional (default is None) Holds the meta infos for the apk. Returns ------- tuple<tuple<str, bool>> First component is the id of the entry and the second a boolean indication if the result has been stored in gridfs. () If an error occurred. ''' try: # method retry_arguments self.__retry_arguments = androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash, fast_apk eandro_apk = None do_script_hash_validation = settings.script_hash_validation_enabled() # open database/apk storage if not already done # reschedule job if connection/open error self.__open_db() self.__open_apk_storage() # setup scripts if do_script_hash_validation: # validate sent hashes with local script hashes self.__setup_scripts_hash_validation(androscripts, script_hashes) else: # reuse if possible self.__setup_scripts_reuse(androscripts, script_hashes) # open apk if not is_hash: log.info("opening apk via raw data ... ") eandro_apk = AnalyzeUtil.open_apk(apk_or_path = apk_zipfile_or_hash, apk = fast_apk, raw = True) else: # get apk from prefetched apk pool eandro_apk = apk_prefetch_pool.get(apk_zipfile_or_hash, None) # could not prefetch if eandro_apk is None: eandro_apk = self.__get_apk_from_storage_retry(apk_zipfile_or_hash, apk = fast_apk) # if None, could not be opened and error has been logged if eandro_apk is not None: result = AnalyzeUtil.analyze_apk(eandro_apk, self.androscripts, min_script_needs, propagate_error = False, reset_scripts = not do_script_hash_validation) if result is not None: fastapk, script_results = result log.info("analyzed %s", fastapk.short_description()) storage_results = self.__store_results(fastapk, script_results) # can be None if errorr occurred if storage_results: return tuple(storage_results) return () except SoftTimeLimitExceeded: log.warn("Task %s exceeded it's soft time limit!", self) raise except ScriptHashValidationError: raise finally: # delete from pool -> we don't need it anymore in the pool if is_hash and apk_zipfile_or_hash in apk_prefetch_pool: del apk_prefetch_pool[apk_zipfile_or_hash]
def _analyze(self): ''' See doc of :py:method:`.BaseAnalyzer.analyze`. ''' # try to get registered workers # it network fails at this point -> stop analysis try: clilog.info(CeleryUtil.get_workers_and_check_network()) except NetworkError as e: log.critical(e) return 0 # storage objects storage = self.storage clilog.info("Number of apks to analyze: %d", self._cnt_apks) try: # get analyze task analyze_task = tasks[CeleryConstants.get_analyze_task_name()] # create storage storage.create_or_open_sub_storages() # send tasks start = time() # apk generator over .apk or apk hashes apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen( self.apks, force_raw_data=self.serialize_apks) clilog.info("Task publishing progress:") # send and serialize .apks # if analysis via path serialize them! if self.serialize_apks: log.info("sending .apks to message broker") self.group_result = group_result = GroupResult(results=[]) for args in self.send_apk_args_generator(apk_gen): task = analyze_task.delay(*args) group_result.add(task) # send only apk id and let fetch via mongodb else: log.info("sending ids of apks") task_group = group( (analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen))) # publish tasks self.group_result = task_group() log.info("sending took %ss", (time() - start)) sys.stderr.write("\nAnalysis progress:\n") # start showing analysis progress self.analyze_stats_view.start() # wait for results log.debug("joining on ResultGroup ... ") # setup callback callback_func = self.get_callback_func(self.success_handler, self.error_handler) CeleryUtil.join_native(self.group_result, propagate=False, callback=callback_func) clilog.info("\nanalysis done ... ") log.info("distributed analysis took %ss", (time() - start)) return self.stop_analysis_view() except DatabaseOpenError as e: log.critical(e) return 0 except (KeyboardInterrupt, Exception) as e: if not isinstance(e, KeyboardInterrupt): log.exception(e) log.warn( "Interrupting distributed analysis ... Please wait a moment!") log.warn("revoking tasks on all workers ...") if celerysettings.CELERY_TASK_REVOCATION_ENABLED: # revoke tasks if self.group_result is None: # revoke via task ids log.debug("revoking while publishing tasks ...") self.task_collection.revoke_all(terminate=True, signal='SIGKILL') else: # revoke via GroupResult if yet available/created # first available after all tasks have been send self.group_result.revoke(terminate=True, signal='SIGKILL') log.warn("revoked tasks and killed workers ...") #return number of analyzed apks return self.stop_analysis_view()
def __init__( self, # import db stuff import_db_name, # file system stuff store_root_dir=None, # result db stuff result_db_name=None, result_db_addr=None, result_db_port=None, # auth result_db_username=None, result_db_passwd=None, # result db ssl stuff result_db_use_ssl=False, ssl_ca_cert=None, # set an apk storage distributed_apk_storage_factory=None, ): """ Parameters ---------- import_db_name : str Name of the database to use. store_root_dir: str, optional (default is None) Holds the path under which results will be stored. If no path is given, nothing will be stored in the file system at all. result_db_name : str, optional (default is "res") The name of the database to use. Will be created if not already existing. result_db_addr : str, optional (default is '127.0.0.1') Address of mongodb database server. result_db_port : int, optional (default is 27017) Port of mongodb database server. result_db_username : str, optional (default is None) No authentication at all. result_db_passwd : str, optional (default is None) No authentication at all. result_db_use_ssl : bool, optional (default is False) Use ssl for the connection. ssl_ca_cert : str, optional (default is None) The CA certificate. distributed_apk_storage_factory : function, optional (default is None) A function returning an object implementing the `ApkCopyInterface`. Use the function to create the storage only on demand. """ self.__apk_distributed_storage = None # store all variables we need for creation of the storages # so that they can be created on demand self.__import_db_name = import_db_name self.__store_root_dir = store_root_dir self.__result_db_name = result_db_name self.__result_db_addr = result_db_addr self.__result_db_port = result_db_port self.__result_db_use_ssl = result_db_use_ssl self.__result_db_ca_cert = ssl_ca_cert self.__apk_storage_factory = distributed_apk_storage_factory # auth # store credentials for lazy creating of database # but dont forget it do delete after db creation! self.__username = result_db_username self.__passwd = result_db_passwd # create them on demand via the getters self.__import_db_storage = None self.__fs_storage = None self.__result_db_storage = None self.__apk_storage = None if self.fs_storage_disabled(): log.info("File system result writing disabled!")
def _analyze(self): ''' See doc of :py:method:BaseAnalyzer.analyze`. ''' try: work_queue = self.work_queue # create worker pool log.debug("starting %s workers ...", self.concurrency) for _ in range(self.concurrency): p = Worker(self.script_list, self.script_hashes, self.min_script_needs, work_queue, self.storage, self.cnt_analyzed_apks, self.analyzed_apks, self.storage_results) self.workers.append(p) p.daemon = True # start workers for p in self.workers: p.start() # queue has size limit -> start workers first then enqueue items log.info("Loading apk paths into work queue ...") for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths): # task is apk with all scripts work_queue.put(apk_stuff) for _ in range(self.concurrency): # signal end-of-work work_queue.put(STOP_SENTINEL) # progress view for cli av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks, self.analyzed_apks) av.daemon = True av.start() # block until workers finished work_queue.join() av.terminate() log.debug("joined on work queue ...") return self.cnt_analyzed_apks.value # try hot shutdown first except KeyboardInterrupt: log.warn("Hot shutdown ... ") try: log.warn("clearing work queue ... ") Util.clear_queue(work_queue) log.warn("cleared work queue ... ") for _ in range(self.concurrency): # signal end-of-work work_queue.put(STOP_SENTINEL) for worker in self.workers: worker.join() log.warn("waited for all workers ... ") return self.cnt_analyzed_apks.value # if user really wants make a cold shutdown -> kill processes except KeyboardInterrupt: log.warn("Cold shutdown ... ") log.warn("Hard shutdown wanted! Killing all workers!") # kill processes via SIGINT -> send CTRL-C for w in self.workers: try: os.kill(w.pid, signal.SIGINT) except: pass return self.cnt_analyzed_apks.value
def create_analyzer(storage, script_list, apks_or_paths = None, mode = ANALYZE_MODE_PARALLEL, concurrency = None, serialize_apks = True ): ''' Create the analyzer only. Parameters ---------- storage : RedundantStorage The store to use. script_list : list<str> List of paths to scripts (complete filename with extension). apks_or_paths: list<str> or list<Apk>, optional (default is None) List of `Apk` or paths to the apks which shall be analyzed with the given scripts If you analyze from paths the `import_date` is not set! mode : str, optional (default is `ANALYZE_MODE_PARALLEL`) Do an parallel analysis by default. Choose between : , , . concurrency : int, optional (default is number of cpu cores) Number of workers to spawn. serialize_apks : bool, optional (default is True) If true, serialize .apk . Otherwise id (hash) of the apk will be send and fetched by the worker from the result db. Be sure to import the apks to the result db first! ''' from androlyze.model.script import ScriptUtil from androlyze.analyze.exception import AndroScriptError try: # list<type<AndroScript>> androscript_list = ScriptUtil.import_scripts(script_list) instantiated_scripts = sorted(ScriptUtil.instantiate_scripts(androscript_list, script_paths = script_list)) if len(instantiated_scripts) == 0: log.warn("No scripts supplied!") return # get hashes for `AndroScript`s so that we can set the hash directly next time we instantiate the script script_hashes = [s.hash for s in instantiated_scripts] min_script_needs = ScriptUtil.get_minimum_script_options(instantiated_scripts) # log infos about scripts clilog.info('Loaded scripts:\n%s', '\n'.join((str(s) for s in instantiated_scripts))) log.info(ScriptUtil.androscript_options_descr(instantiated_scripts)) if apks_or_paths: def create_analyzer(): analyzer = None # argument for BaseAnalyzer args = storage, androscript_list, script_hashes, min_script_needs, apks_or_paths log.info("Mode: %s", mode) # normal analyzer if mode == ANALYZE_MODE_NON_PARALLEL: from androlyze.analyze.Analyzer import Analyzer analyzer = Analyzer(*args) # use parallel analyzer elif mode == ANALYZE_MODE_PARALLEL: from androlyze.analyze.parallel.ParallelAnalyzer import ParallelAnalyzer analyzer = ParallelAnalyzer(*args, concurrency = concurrency) # use distributed one elif mode == ANALYZE_MODE_DISTRIBUTED: from androlyze.analyze.distributed.DistributedAnalyzer import DistributedAnalyzer analyzer = DistributedAnalyzer(*args, concurrency = concurrency, serialize_apks = serialize_apks) return analyzer return create_analyzer() except ApkImportError as e: log.warn(e) except IOError as e: log.warn(AndroScriptError(e.filename, caused_by = e)) sys.exit(1) except ImportError as e: log.exception(e) except Exception as e: log.exception(e)
def run(self, androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash=True, fast_apk=None): ''' Do the analysis on the apk with the given scripts. Parameters ---------- androscripts : list<str> List of package names. script_hashes : list<str> If given, set the hash for the `AndroScript`s min_script_needs : tuple<bool> See :py:method:`ScriptUtil.get_maximal_script_options`. apk_zipfile_or_hash : str The raw contents of the .apk file or the hash (sha256). The raw content of the .apk file (zipfile) or the hash of it (id in db). is_hash : bool, optional (default is True) Determines if `apk_zipfile_or_hash` is a hash (id). fast_apk : FastApk, optional (default is None) Holds the meta infos for the apk. Returns ------- tuple<tuple<str, bool>> First component is the id of the entry and the second a boolean indication if the result has been stored in gridfs. () If an error occurred. ''' try: # method retry_arguments self.__retry_arguments = androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash, fast_apk eandro_apk = None do_script_hash_validation = settings.script_hash_validation_enabled( ) # open database/apk storage if not already done # reschedule job if connection/open error self.__open_db() self.__open_apk_storage() # setup scripts if do_script_hash_validation: # validate sent hashes with local script hashes self.__setup_scripts_hash_validation(androscripts, script_hashes) else: # reuse if possible self.__setup_scripts_reuse(androscripts, script_hashes) # open apk if not is_hash: log.info("opening apk via raw data ... ") eandro_apk = AnalyzeUtil.open_apk( apk_or_path=apk_zipfile_or_hash, apk=fast_apk, raw=True) else: # get apk from prefetched apk pool eandro_apk = apk_prefetch_pool.get(apk_zipfile_or_hash, None) # could not prefetch if eandro_apk is None: eandro_apk = self.__get_apk_from_storage_retry( apk_zipfile_or_hash, apk=fast_apk) # if None, could not be opened and error has been logged if eandro_apk is not None: result = AnalyzeUtil.analyze_apk( eandro_apk, self.androscripts, min_script_needs, propagate_error=False, reset_scripts=not do_script_hash_validation) if result is not None: fastapk, script_results = result log.info("analyzed %s", fastapk.short_description()) storage_results = self.__store_results( fastapk, script_results) # can be None if errorr occurred if storage_results: return tuple(storage_results) return () except SoftTimeLimitExceeded: log.warn("Task %s exceeded it's soft time limit!", self) raise except ScriptHashValidationError: raise finally: # delete from pool -> we don't need it anymore in the pool if is_hash and apk_zipfile_or_hash in apk_prefetch_pool: del apk_prefetch_pool[apk_zipfile_or_hash]
def __init__(self, db_name = None, dest_addr = None, dest_port = None, # auth username = None, passwd = None, # ssl use_ssl = False, ssl_ca_certs = None, ): ''' Create (if not existing) and open the database and collections. Parameters ---------- db_name : str, optional (default is "res") The name of the database to use. Will be created if not already existing. dest_addr : str, optional (default is '127.0.0.1') Address of mongodb database server. dest_port : int, optional (default is 27017) Port of mongodb database server. username : str, optional (default is None) No authentication at all. passwd : str, optional (default is None) No authentication at all. use_ssl : bool, optional (default is False) Use ssl for the connection. ssl_ca_certs : str, optional (default is None) The CA certificate. Raises ------ DatabaseOpenError ''' # db name not allowed if db_name == APK_DB_NAME: raise DatabaseOpenError(db_name, msg = 'Database name "%s" reserved for apk storage!' % db_name), None, sys.exc_info()[2] # set default values if db_name is None: db_name = 'res' if dest_addr is None: dest_addr = '127.0.0.1' if dest_port is None: dest_port = 27017 try: self.__db_name = db_name self.__dest_addr = dest_addr self.__dest_port = dest_port self.__use_ssl = use_ssl # only pass ssl parameters if ssl enabled ssl_params = dict(ssl = use_ssl, ssl_cert_reqs = ssl.CERT_NONE) if use_ssl else {} # set None cause if connection cannot be initiated, conn var will not in scope self.conn = None self.__conn = conn = pymongo.MongoClient(host = dest_addr, port = dest_port, **ssl_params) # authentication is per database! # do auth before probable db creation etc. if None not in (username, passwd): # authenticate if credentials given log.debug("authenticating with mongodb ...") conn["admin"].authenticate(username, passwd) else: log.debug("not authenticating with mongodb ... no credentials supplied!") self.__db = conn[self.db_name] # apk db self.__apk_db = conn[APK_DB_NAME] self.__apk_coll = gridfs.GridFS(self.__apk_db, GRIDFS_COLLS_PREFIX) # create/open collections self.__res_coll = self._open_res_coll() self.__files_coll = self.__db[GRIDFS_COLLS_PREFIX][GRIDFS_FILES] # grid fs for binary files, supports files > 16 mb self.__grid_fs = self._open_gridfs() # create indexes self._create_idx_for_colls() log.info("Opened database: %s", self) log.debug("CA certificate: %s", ssl_ca_certs) except PyMongoError as e: raise DatabaseOpenError(str(self), caused_by = e), None, sys.exc_info()[2]
def __init__(self, # import db stuff import_db_name, # file system stuff store_root_dir = None, # result db stuff result_db_name = None, result_db_addr = None, result_db_port = None, # auth result_db_username = None, result_db_passwd = None, # result db ssl stuff result_db_use_ssl = False, ssl_ca_cert = None, # set an apk storage distributed_apk_storage_factory = None ): ''' Parameters ---------- import_db_name : str Name of the database to use. store_root_dir: str, optional (default is None) Holds the path under which results will be stored. If no path is given, nothing will be stored in the file system at all. result_db_name : str, optional (default is "res") The name of the database to use. Will be created if not already existing. result_db_addr : str, optional (default is '127.0.0.1') Address of mongodb database server. result_db_port : int, optional (default is 27017) Port of mongodb database server. result_db_username : str, optional (default is None) No authentication at all. result_db_passwd : str, optional (default is None) No authentication at all. result_db_use_ssl : bool, optional (default is False) Use ssl for the connection. ssl_ca_cert : str, optional (default is None) The CA certificate. distributed_apk_storage_factory : function, optional (default is None) A function returning an object implementing the `ApkCopyInterface`. Use the function to create the storage only on demand. ''' self.__apk_distributed_storage = None # store all variables we need for creation of the storages # so that they can be created on demand self.__import_db_name = import_db_name self.__store_root_dir = store_root_dir self.__result_db_name = result_db_name self.__result_db_addr = result_db_addr self.__result_db_port = result_db_port self.__result_db_use_ssl = result_db_use_ssl self.__result_db_ca_cert = ssl_ca_cert self.__apk_storage_factory = distributed_apk_storage_factory # auth # store credentials for lazy creating of database # but dont forget it do delete after db creation! self.__username = result_db_username self.__passwd = result_db_passwd # create them on demand via the getters self.__import_db_storage = None self.__fs_storage = None self.__result_db_storage = None self.__apk_storage = None if self.fs_storage_disabled(): log.info('File system result writing disabled!')
def _analyze(self): ''' See doc of :py:method:`.BaseAnalyzer.analyze`. ''' # try to get registered workers # it network fails at this point -> stop analysis try: clilog.info(CeleryUtil.get_workers_and_check_network()) except NetworkError as e: log.critical(e) return 0 # storage objects storage = self.storage clilog.info("Number of apks to analyze: %d", self._cnt_apks) try: # get analyze task analyze_task = tasks[CeleryConstants.get_analyze_task_name()] # create storage storage.create_or_open_sub_storages() # send tasks start = time() # apk generator over .apk or apk hashes apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(self.apks, force_raw_data = self.serialize_apks) clilog.info("Task publishing progress:") # send and serialize .apks # if analysis via path serialize them! if self.serialize_apks: log.info("sending .apks to message broker") self.group_result = group_result = GroupResult(results = []) for args in self.send_apk_args_generator(apk_gen): task = analyze_task.delay(*args) group_result.add(task) # send only apk id and let fetch via mongodb else: log.info("sending ids of apks") task_group = group((analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen))) # publish tasks self.group_result = task_group() log.info("sending took %ss", (time() - start)) sys.stderr.write("\nAnalysis progress:\n") # start showing analysis progress self.analyze_stats_view.start() # wait for results log.debug("joining on ResultGroup ... ") # setup callback callback_func = self.get_callback_func(self.success_handler, self.error_handler) CeleryUtil.join_native(self.group_result, propagate = False, callback = callback_func) clilog.info("\nanalysis done ... ") log.info("distributed analysis took %ss", (time() - start)) return self.stop_analysis_view() except DatabaseOpenError as e: log.critical(e) return 0 except (KeyboardInterrupt, Exception) as e: if not isinstance(e, KeyboardInterrupt): log.exception(e) log.warn("Interrupting distributed analysis ... Please wait a moment!") log.warn("revoking tasks on all workers ...") if celerysettings.CELERY_TASK_REVOCATION_ENABLED: # revoke tasks if self.group_result is None: # revoke via task ids log.debug("revoking while publishing tasks ...") self.task_collection.revoke_all(terminate = True, signal = 'SIGKILL') else: # revoke via GroupResult if yet available/created # first available after all tasks have been send self.group_result.revoke(terminate = True, signal = 'SIGKILL') log.warn("revoked tasks and killed workers ...") #return number of analyzed apks return self.stop_analysis_view()