def fmt_metadata(metadata): ''' Escape the dictionary suitable for boto. Parameters ---------- metadata : dict ''' def fmt(v): fallback = 'N/A' if v is None: return fallback return v metadata = Util.escape_dict(metadata, fmt, escape_keys = False, escape_values = True) metadata = Util.escape_dict(metadata, lambda v: re.sub("\s+", "_", v), escape_keys = True, escape_values = False) return metadata
def fmt_metadata(metadata): ''' Escape the dictionary suitable for boto. Parameters ---------- metadata : dict ''' def fmt(v): fallback = 'N/A' if v is None: return fallback return v metadata = Util.escape_dict(metadata, fmt, escape_keys=False, escape_values=True) metadata = Util.escape_dict(metadata, lambda v: re.sub("\s+", "_", v), escape_keys=True, escape_values=False) return metadata
def get_apk(self, _hash, apk = None, **kwargs): ''' Get the `EAndroApk` from `_hash`. Parameters ---------- _hash : str Hash of the .apk (sha256) apk : Apk Carries metainformation needed to build the whole path to the element in S3. Raises ------ S3StorageLoadException Returns ------- EAndroApk Apk constructed from raw data and meta infos. ''' # use to hold apk meta infos #fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag) try: apk_raw = BytesIO() if apk is None: raise S3StorageLoadException(self, content = "Apk:%s" % apk.short_description(), caused_by = RuntimeError("No APK metainformation given!")), None, sys.exc_info()[2] _id = Util.get_apk_path_incl_filename(apk) self.bucket_get(_id).get_contents_to_file(apk_raw) apk_raw.seek(0) eandro_apk = AnalyzeUtil.open_apk(apk_raw.read(), None, raw = True) return eandro_apk except (BotoClientError, S3ResponseError) as e: raise S3StorageLoadException(self, content = "Apk:%s" % apk.short_description(), caused_by = e), None, sys.exc_info()[2]
def get_apk_sub_path(self, apk): ''' Returns the sub path structure for the `apk`. The structure is: ... |-> package |-> version |-> sha256 Parameters ---------- apk: Apk Returns ------- str: path Raises ------ CouldNotOpenApk If the APK could no be opened ''' package = apk.package_name version_name = apk.version_name # if hash caluclated from file, can raise CouldNotOpenApk sha256 = apk.hash return StorageUtil.get_apk_path(package, version_name, sha256)
def get_s3_id(apk): ''' Parameters ---------- apk : Apk Returns ------- str The id for the apk in the S3 storage ''' return Util.get_apk_path_incl_filename(apk)
def escape_keys(_dict): ''' Escape the keys in the `_dict` so that the `_dict` can be inserted into mongodb. Will do a deepcopy of the `dict`! So escaping isn't in-place! Parameters ---------- _dict : dict Returns ------- dict ''' return Util.escape_dict(_dict, escape_key, escape_keys = True, escape_values = False)
def get_apk_res_path_all_args(self, package_name, version_name, _hash): ''' Returns the path structure for result storage. Parameters ---------- package_name : str Package name of the apk. Unique apk identifier (at least in the store) version_name : str Version name _hash : str The hash of the apk. Returns ------- str: path ''' return join(self.store_root_dir, self.APK_RES_DIRNAME, StorageUtil.get_apk_path(package_name, version_name, _hash))
def action_delete_apks_import(storage, delete_apk = False, hashes = None, package_names = None, tags = None, select_whole_db = False): ''' Delete from the import storage (database and/or filesys) Parameters ---------- storage : RedundantStorage The store to use. delete_apk : boolean, optional (default is False) hashes : iterable<str>, optional (default is None) package_names : iterable<str>, optional (default is None) tags : iterable<str>, optional (default is None) select_whole_db : boolean, optional (default is False) If true, select the whole import database! Be careful! This means we do not take `hashes`, `package_names` and `tags` into acccount! Raises ------ ValueError ''' try: apks = None if select_whole_db: apks = action_query_import_db(storage, COMMAND_QUERY_APKS, hashes, package_names, tags) # If don't delete whole database!!!!! elif len(Util.filter_not_none((hashes, package_names, tags))) > 0: apks = action_query_import_db(storage, COMMAND_QUERY_APKS, hashes, package_names, tags) else: raise ValueError('''Neither hashes nor package names nor tags specified! If you wan't do use the whole database, set `select_whole_db` to true. ''') # use list, otherwise we have duplicates due to the generator for apk in list(apks): if delete_apk: clilog.info("Will delete from database and file system: \n%s ", apk.detailed_description()) else: clilog.info("Will delete %s from database: %s ", apk.short_description(), storage.import_db_storage) storage.delete_entry_for_apk(apk, delete_apk) except StorageException as e: log.warn(e)
def get_apk(self, _hash, apk=None, **kwargs): ''' Get the `EAndroApk` from `_hash`. Parameters ---------- _hash : str Hash of the .apk (sha256) apk : Apk Carries metainformation needed to build the whole path to the element in S3. Raises ------ S3StorageLoadException Returns ------- EAndroApk Apk constructed from raw data and meta infos. ''' # use to hold apk meta infos #fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag) try: apk_raw = BytesIO() if apk is None: raise S3StorageLoadException( self, content="Apk:%s" % apk.short_description(), caused_by=RuntimeError("No APK metainformation given!" )), None, sys.exc_info()[2] _id = Util.get_apk_path_incl_filename(apk) self.bucket_get(_id).get_contents_to_file(apk_raw) apk_raw.seek(0) eandro_apk = AnalyzeUtil.open_apk(apk_raw.read(), None, raw=True) return eandro_apk except (BotoClientError, S3ResponseError) as e: raise S3StorageLoadException(self, content="Apk:%s" % apk.short_description(), caused_by=e), None, sys.exc_info()[2]
def action_import_apks(storage, apk_paths, copy_apk = False, copy_to_mongodb = False, update = False, tag = None, # shared memory cnt_imported_apks = None, total_apk_count = None, import_finished = None, # concurrent settings concurrency = None ): ''' Import the apks from the `apk_paths` and create the file system structure where the results will be kept, specified by `storage`. Parameters ---------- storage : RedundantStorage The store to use. apk_paths : iterable<str> The apk files and/or directories. copy_apk : bool Import the apk file to the `import_dir` (copy it). copy_to_mongodb : bool, optional (default is False) Also import into MongoDB. Useful for the distributed analysis. update : bool Update apks that have already been imported. tag : str, optional (default is None) Some tag cnt_imported_apks : multiprocessing.Value<int>, optional (default is None) If given, use for progress updating. total_apk_count : multiprocessing.Value<int>, optional (default is None) If given, use for total count of apks. import_finished : multiprocessing.Value<byte>, optional (default is None) If given, use to signal that import has been completed. concurrency : int, optional (default is number of cpus) Number of processes to use for the import. ''' from androlyze.loader.ApkImporter import ApkImporter # get single paths to apks so we get the correct total count of apks clilog.info("looking for apks in given paths ... ") apk_paths = ApkImporter.get_apks_from_list_or_dir(apk_paths) if total_apk_count is not None: # may be time consuming for recursive lookup apk_paths, total_apk_count.value = Util.count_iterable_n_clone(apk_paths) # create count if not given if cnt_imported_apks is None: cnt_imported_apks = Value('i', 0, lock = RLock()) # set concurrency if concurrency is None: concurrency = cpu_count() log.warn("Using %d processes", concurrency) clilog.info("Storage dir is %s" % storage.fs_storage.store_root_dir) if copy_apk: clilog.info("Copying APKs to %s ..." % storage.fs_storage.store_root_dir) def import_apks(apk_paths): apk_importer = ApkImporter(apk_paths, storage) for apk in apk_importer.import_apks(copy_apk = copy_apk, copy_to_mongodb = copy_to_mongodb, update = update, tag = tag): clilog.info("imported %s", apk.short_description()) # use shared memory counter if given if cnt_imported_apks is not None: with cnt_imported_apks.get_lock(): cnt_imported_apks.value += 1 pool = [] # don't convert generator to list if only 1 process wanted apk_paths = [apk_paths] if concurrency == 1 else Util.split_n_uniform_distri(list(apk_paths), concurrency) # start parallel import # multiprocessing's pool causes pickle errors for i in range(concurrency): p = Process(target = import_apks, args = (apk_paths[i], )) log.debug("starting process %s", p) pool.append(p) p.start() for it in pool: log.debug("joined on process %s", p) it.join() apks_imported = cnt_imported_apks.value != 0 # show some message that no APK has been imported if not apks_imported: log.warn("No .apk file has been imported! This means no .apk file has been found or they already have been imported.") else: clilog.info("done") # because not all apks may be importable, we cannot use we count for signal that the import is done if import_finished is not None: import_finished.value = 1 clilog.info("Imported %d apks", cnt_imported_apks.value)
from androlyze.storage import Util from androlyze.storage.exception import StorageException from androlyze.storage.resultdb import MongoUtil from androlyze.util import Util from androlyze.Constants import * __all__ = [] __author__ = u"Nils Tobias Schmidt, Lars Baumgärtner" __copyright__ = PROJECT_COPYRIGHT __license__ = PROJECT_LICENSE __version__ = PROJECT_VERSION __email__ = "{schmidt89,lbaumgaertner}@informatik.uni-marburg.de" try: # load androguard Util.set_androguard_path(settings.singleton) # import namespace of androguards androlyze.py module imp.load_source("androlyze", "%s/androlyze.py" % settings.singleton[(SECTION_ANDROGUARD, KEY_ANDROGUARD_PATH)]) from androlyze import * except Exception as e: log.error(e) ############################################################ #---Import ############################################################ def action_import_apks(storage, apk_paths, copy_apk = False, copy_to_mongodb = False, update = False, tag = None, # shared memory cnt_imported_apks = None, total_apk_count = None, import_finished = None,