def import_from_flo(self, file_like_object, import_path_str = "file-like object", copy2disk = False, copy2mongodb = False, update = False, tag = None): ''' Import an apk from a `file_like_object` if not already in the storage. Will also set the path (absolute) of the returned `Apk` (the directory to which it got imported) at least if `copy2disk` is true. Also sets the import date and tag. Parameters ---------- file_like_object import_path_str : str, optional (default is "file-like object") Optional string which will be passed to the Exceptions if they get raised. Describes from which the import failed. copy2disk : bool, optional (default is False) If true also import the apk file (copy it) copy2mongodb : bool, optional (default is False) Also import into MongoDB. Needed for the distributed analysis. update : bool, optional (default is False) Update apks that have already been imported. tag : str, optional (default is None) Tag the apks. Raises ------ ApkImportError Returns ------- Apk If no error occurred. ''' try: apk = FastApk.fast_load_from_io(file_like_object, import_path_str, calculate_hash = True) storage = self.storage def set_apk_meta(apk): apk.import_date = datetime.utcnow() apk.tag = tag # set apk meta set_apk_meta(apk) # set import path as new path for apk file # needed to have the correct path when creating entry ! # copy to disk and/or db _id, file_path = storage.copy_apk(apk, file_like_object, copy2db = copy2mongodb, copy2fs = copy2disk) # set path where file has been copied to # otherwise use supplied path if copy2disk: apk.path = file_path # create entry in storage storage.create_entry_for_apk(apk, update, tag) return apk except (StorageException, IOError) as e: raise ApkImportError(e), None, sys.exc_info()[2]
def store_result_dict(self, res_dict): ''' Store the analysis results from the `res_dict`. All needed infos for storage will be taken from it. Parameters ---------- res_dict : dict See `ResultObject.description_dict` ''' fastapk = FastApk.load_from_result_dict(res_dict) script = AndroScript.load_from_result_dict(res_dict, fastapk) try: self.create_entry_for_apk(fastapk, update = True) self.store_result_for_apk(fastapk, script) except FileSysStoreException as e: log.warn(e)
def get_apk(self, _hash, **kwargs): ''' Get the `EAndroApk` from `_hash`. Parameters ---------- _hash : str Hash of the .apk (sha256) Raises ------ DatabaseLoadException NoFile If the file is not present. Returns ------- EAndroApk Apk constructed from raw data and meta infos. ''' try: gridfs = self.__apk_coll log.info("getting apk: %s from mongodb ...", _hash) gridfs_obj = gridfs.get(_hash) # get raw .apk apk_zipfile = gridfs_obj.read() # get apk meta infos apk_meta = gridfs_obj.metadata package_name, version_name, path, _hash, import_date, tag = apk_meta[RESOBJ_APK_META_PACKAGE_NAME], apk_meta[RESOBJ_APK_META_VERSION_NAME], apk_meta[RESOBJ_APK_META_PATH], apk_meta[RESOBJ_APK_META_HASH], apk_meta[RESOBJ_APK_META_IMPORT_DATE], apk_meta[RESOBJ_APK_META_TAG] # use to hold apk meta infos fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag) eandro_apk = AnalyzeUtil.open_apk(apk_zipfile, fast_apk, raw = True) log.info("got apk") return eandro_apk except NoFile: raise except PyMongoError as e: raise DatabaseLoadException(self, content = "Apk (hash=%s)" % _hash, caused_by = e), None, sys.exc_info()[2]
def run(self): work_queue = self.work_queue try: for work in iter(work_queue.get, STOP_SENTINEL): try: apk_path, _apk, _ = work eandro_apk = AnalyzeUtil.open_apk(apk_path, apk=_apk) # do the analysis res = self.analyze_apk(eandro_apk) # remember yet analyzed APKs if eandro_apk: self.analyzed_apks.put( FastApk.load_from_eandroapk(eandro_apk)) # collect results if res is not None: self.__store_results([res]) else: # increment analyzed apks counter self.add_analyzed_apks_sm(1) except KeyboardInterrupt as e: raise e except Exception as e: log.exception(e) finally: # signal one task done work_queue.task_done() # signal sentinel read work_queue.task_done() work_queue.close() # be silent except KeyboardInterrupt: pass
def run(self): work_queue = self.work_queue try: for work in iter(work_queue.get, STOP_SENTINEL): try: apk_path, _apk, _ = work eandro_apk = AnalyzeUtil.open_apk(apk_path, apk=_apk) # do the analysis res = self.analyze_apk(eandro_apk) # remember yet analyzed APKs if eandro_apk: self.analyzed_apks.put(FastApk.load_from_eandroapk(eandro_apk)) # collect results if res is not None: self.__store_results([res]) else: # increment analyzed apks counter self.add_analyzed_apks_sm(1) except KeyboardInterrupt as e: raise e except Exception as e: log.exception(e) finally: # signal one task done work_queue.task_done() # signal sentinel read work_queue.task_done() work_queue.close() # be silent except KeyboardInterrupt: pass
def analyze_apk_ana_objs(ana_objs, time_s, eandro_apk, scripts, propagate_error=False, reset_scripts=True): ''' Analyze the `eandro_apk` with the given `scripts` assuming each `AndroScript` neads at least `min_script_needs`. Be sure that you reseted the `scripts`! Parameters ---------- eandro_apk : EAndroApk The apk. scripts : iterable<AndroScript> The scripts to use for the analysis. propagate_error : bool, optional (default is False) If true propagate errors. reset_scripts : bool, optional (default is True) If given, reset the `AndroScript` before analyzing. Returns ------- list<FastApk, list<AndroScript>> Uses `FastApk` to only store the meta information, not the apk data! None If error happened. ''' from androlyze.analyze.exception import AndroScriptError try: # reset scripts if reset_scripts: for s in scripts: s.reset() if eandro_apk is not None: fastapk = None script_results = [] for s in scripts: try: result_obj = s.analyze(eandro_apk, *ana_objs) # we only need the meta infos of the apk if eandro_apk is not None: fastapk = FastApk.load_from_eandroapk(eandro_apk) # set androguard analysis time if script wants stats s.add_apk_androguard_analyze_time(time_s) # link to apk if isinstance(result_obj, ResultObject): result_obj.set_apk(fastapk) script_results.append(s) except Exception as e: if propagate_error: raise else: log.exception(AndroScriptError(s, e)) if fastapk is not None: # use fastapk to only store the meta information, not the apk data! return [fastapk, script_results] # interrupt analysis if analysis objects could not be created! except DexError as e: log.exception(e)
def get_imported_apks(self, hashes=None, package_names=None, tags=None, **kwargs): order_by = kwargs.get('order_by', TABLE_APK_IMPORT_KEY_PACKAGE_NAME) # prevent sql injection if not order_by in TABLE_APK_IMPORT_KEYS: raise ValueError("Sort key has to be in %s, is: %s" % (TABLE_APK_IMPORT_KEYS, order_by)) ascending = kwargs.get("ascending", True) sort_direction = 'ASC' if ascending else 'DESC' try: SQL_STMT = 'SELECT * FROM %s ' % TABLE_APK_IMPORT # create temporary database to store many values and have them later in the IN clause available with self.conn as _conn: c = _conn.cursor() c.execute("DROP TABLE IF EXISTS data_helper") c.execute("CREATE TEMPORARY TABLE data_helper (value TEXT)") INSERT_HELPER_STMT = "INSERT INTO data_helper VALUES (?)" DYN_IN_STMT = 'WHERE %s IN (SELECT * FROM data_helper)' args = () if hashes is not None: args = tuple(hashes) SQL_STMT += DYN_IN_STMT % TABLE_APK_IMPORT_KEY_HASH elif package_names is not None: args = tuple(package_names) SQL_STMT += DYN_IN_STMT % TABLE_APK_IMPORT_KEY_PACKAGE_NAME elif tags is not None: args = tuple(tags) SQL_STMT += DYN_IN_STMT % TABLE_APK_IMPORT_KEY_TAG # insert values into temporary table but only if `hashes` or `package_names` has been supplied # otherwise return all apks if args: # executemany needs iterable<tuple> INSERT_ARGS = ((a, ) for a in args) c.executemany(INSERT_HELPER_STMT, INSERT_ARGS) # sort by package names and version SQL_STMT += ' ORDER BY %s COLLATE NOCASE %s, %s' % ( order_by, sort_direction, TABLE_APK_IMPORT_KEY_PACKAGE_NAME) # get apks c = self.conn.cursor().execute(SQL_STMT) # treat cursor as iterator for apk_dict in c: yield FastApk( apk_dict[TABLE_APK_IMPORT_KEY_PACKAGE_NAME], apk_dict[TABLE_APK_IMPORT_KEY_VERSION_NAME], path=apk_dict[TABLE_APK_IMPORT_KEY_PATH], _hash=apk_dict[TABLE_APK_IMPORT_KEY_HASH], import_date=apk_dict[TABLE_APK_IMPORT_KEY_IMPORT_DATE], tag=apk_dict[TABLE_APK_IMPORT_KEY_TAG], size_app_code=apk_dict[TABLE_APK_IMPORT_KEY_SIZE_APP_CODE], build_date=apk_dict.get(TABLE_APK_IMPORT_KEY_BUILD_DATE)) except (sqlite3.Error, KeyError) as e: data = "all apks" if hashes is not None: data = ', '.join(hashes) elif package_names is not None: data = ', '.join(package_names) raise ImportQueryError(DatabaseLoadException( self, data, e)), None, sys.exc_info()[2]
def import_from_flo(self, file_like_object, import_path_str="file-like object", copy2disk=False, copy2mongodb=False, update=False, tag=None): ''' Import an apk from a `file_like_object` if not already in the storage. Will also set the path (absolute) of the returned `Apk` (the directory to which it got imported) at least if `copy2disk` is true. Also sets the import date and tag. Parameters ---------- file_like_object import_path_str : str, optional (default is "file-like object") Optional string which will be passed to the Exceptions if they get raised. Describes from which the import failed. copy2disk : bool, optional (default is False) If true also import the apk file (copy it) copy2mongodb : bool, optional (default is False) Also import into MongoDB. Needed for the distributed analysis. update : bool, optional (default is False) Update apks that have already been imported. tag : str, optional (default is None) Tag the apks. Raises ------ ApkImportError Returns ------- Apk If no error occurred. ''' try: apk = FastApk.fast_load_from_io(file_like_object, import_path_str, calculate_hash=True) storage = self.storage def set_apk_meta(apk): apk.import_date = datetime.utcnow() apk.tag = tag # set apk meta set_apk_meta(apk) # set import path as new path for apk file # needed to have the correct path when creating entry ! # copy to disk and/or db _id, file_path = storage.copy_apk(apk, file_like_object, copy2db=copy2mongodb, copy2fs=copy2disk) # set path where file has been copied to # otherwise use supplied path if copy2disk: apk.path = file_path # create entry in storage storage.create_entry_for_apk(apk, update, tag) return apk except (StorageException, IOError) as e: raise ApkImportError(e), None, sys.exc_info()[2]
def analyze_apk_ana_objs(ana_objs, time_s, eandro_apk, scripts, propagate_error = False, reset_scripts = True): ''' Analyze the `eandro_apk` with the given `scripts` assuming each `AndroScript` neads at least `min_script_needs`. Be sure that you reseted the `scripts`! Parameters ---------- eandro_apk : EAndroApk The apk. scripts : iterable<AndroScript> The scripts to use for the analysis. propagate_error : bool, optional (default is False) If true propagate errors. reset_scripts : bool, optional (default is True) If given, reset the `AndroScript` before analyzing. Returns ------- list<FastApk, list<AndroScript>> Uses `FastApk` to only store the meta information, not the apk data! None If error happened. ''' from androlyze.analyze.exception import AndroScriptError try: # reset scripts if reset_scripts: for s in scripts: s.reset() if eandro_apk is not None: fastapk = None script_results = [] for s in scripts: try: result_obj = s.analyze(eandro_apk, *ana_objs) # we only need the meta infos of the apk if eandro_apk is not None: fastapk = FastApk.load_from_eandroapk(eandro_apk) # set androguard analysis time if script wants stats s.add_apk_androguard_analyze_time(time_s) # link to apk if isinstance(result_obj, ResultObject): result_obj.set_apk(fastapk) script_results.append(s) except Exception as e: if propagate_error: raise else: log.exception(AndroScriptError(s, e)) if fastapk is not None: # use fastapk to only store the meta information, not the apk data! return [fastapk, script_results] # interrupt analysis if analysis objects could not be created! except DexError as e: log.exception(e)
def fetch_results_from_mongodb(self, rds, results, wait_for_db = True, # progress nice_progess = False, synced_entries = None, total_sync_entries = None): ''' Fetch some results from the result database and write them to disk. If data cannot be loaded from db, try until it can be. Parameters ---------- rds : ResultDatabaseStorage The database to query for the results. results : list< tuple<id, gridfs (bool)> > Define which results shall be fetched. wait_for_db : bool, optional (default is True) Wait until data could be fetched from db. nice_progess : bool, optional (default is False) If enabled update show some nice progress bar on the cli. synced_entries : multiprocessing.Value<int>, optional (default is None) If supplied store number of already synces entries. total_sync_entries : multiprocessing.Value<int>, optional (default is None) If supplied store number of total entries to sync. Raises ------ DatabaseLoadException If `wait_for_db` is False and an error occurred. ''' # retry in ... seconds DATABASE_RETRY_TIME = 5 # if true assume both counts are shared memory (Value) use_shared_memory = synced_entries is not None and total_sync_entries is not None if results is not None: results_stored = False while not results_stored: try: # get ids non_gridfs_ids, gridfs_ids = MongoUtil.split_result_ids(results) # counts cnt_non_gridfs_ids = len(non_gridfs_ids) cnt_gridfs_ids = len(gridfs_ids) if use_shared_memory: total_sync_entries.value = cnt_gridfs_ids + cnt_non_gridfs_ids # gridfs raw data as well as metadata gridfs_entries_raw = [] if gridfs_ids: gridfs_entries_raw = rds.get_results_for_ids(gridfs_ids, non_document = True, non_document_raw = True) # regular documents (non gridfs) non_gridfs_entries = [] if non_gridfs_ids: non_gridfs_entries = rds.get_results_for_ids(non_gridfs_ids, non_document = False, non_document_raw = True) if not nice_progess: log.debug("fetching %d non-documents (gridfs) ... ", cnt_gridfs_ids) for i, gridfs_entry_raw in enumerate(gridfs_entries_raw, 1): # get our stored metadata (for script and apk) gridfs_entry_meta = gridfs_entry_raw.metadata if not nice_progess: log.debug("getting results for %s", gridfs_entry_meta[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME]) else: Util.print_dyn_progress(Util.format_progress(i, cnt_gridfs_ids)) # use apk to extract data from dict fastapk = FastApk.load_from_result_dict(gridfs_entry_meta) # get filename file_name = gridfs_entry_raw.filename # write results to disk try: self.store_custom_data(fastapk.package_name, fastapk.version_name, fastapk.hash, file_name, gridfs_entry_raw.read()) except FileSysStoreException as e: log.exception(e) # update shared memory progress indicitor if use_shared_memory: with synced_entries.get_lock(): synced_entries.value += 1 if not nice_progess: log.debug("fetching %d documents (non-gridfs) ... ", cnt_non_gridfs_ids) for i, non_gridfs_entry in enumerate(non_gridfs_entries, 1): if not nice_progess: clilog.debug("getting results for %s" % non_gridfs_entry[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME]) else: Util.print_dyn_progress(Util.format_progress(i, cnt_non_gridfs_ids)) # write results to disk self.store_result_dict(non_gridfs_entry) # update shared memory progress indicitor if use_shared_memory: with synced_entries.get_lock(): synced_entries.value += 1 # if not wait for db wanted stop here results_stored = True or not wait_for_db except (DatabaseLoadException, PyMongoError) as e: if not wait_for_db: raise log.warn(e) Util.log_will_retry(DATABASE_RETRY_TIME, exc = e) sleep(DATABASE_RETRY_TIME)
''' if key in res_dict or register_key: func(res_dict, key, value) else: raise KeyNotRegisteredError(key, *categories) if __name__ == '__main__': from androlyze.model.android.apk.FastApk import FastApk from datetime import datetime # we will link the `ResultObject` to an `Apk` to see it's meta information # but we don't need to link against any `Apk` ! apk = FastApk("com.foo.bar", "1.0", "/", "some hash", datetime.utcnow(), tag="exploitable") res = ResultObject(apk) res.register_bool_keys(["check1", "check2"]) # register enumeration keys res.register_enum_keys( ["activities", "content providers", "broadcast receivers", "services"], "components") # this shows how you can abstract categories, in this example into a tuple # the important point is that you need to unpack it with "*" ! ROOT_CAT = ("apkinfo", "listings") res.register_keys(["files"], *ROOT_CAT)