def get_apk(self, _hash, apk = None, **kwargs): ''' Get the `EAndroApk` from `_hash`. Parameters ---------- _hash : str Hash of the .apk (sha256) apk : Apk Carries metainformation needed to build the whole path to the element in S3. Raises ------ S3StorageLoadException Returns ------- EAndroApk Apk constructed from raw data and meta infos. ''' # use to hold apk meta infos #fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag) try: apk_raw = BytesIO() if apk is None: raise S3StorageLoadException(self, content = "Apk:%s" % apk.short_description(), caused_by = RuntimeError("No APK metainformation given!")), None, sys.exc_info()[2] _id = Util.get_apk_path_incl_filename(apk) self.bucket_get(_id).get_contents_to_file(apk_raw) apk_raw.seek(0) eandro_apk = AnalyzeUtil.open_apk(apk_raw.read(), None, raw = True) return eandro_apk except (BotoClientError, S3ResponseError) as e: raise S3StorageLoadException(self, content = "Apk:%s" % apk.short_description(), caused_by = e), None, sys.exc_info()[2]
def get_apk(self, _hash, **kwargs): ''' Get the `EAndroApk` from `_hash`. Parameters ---------- _hash : str Hash of the .apk (sha256) Raises ------ DatabaseLoadException NoFile If the file is not present. Returns ------- EAndroApk Apk constructed from raw data and meta infos. ''' try: gridfs = self.__apk_coll log.info("getting apk: %s from mongodb ...", _hash) gridfs_obj = gridfs.get(_hash) # get raw .apk apk_zipfile = gridfs_obj.read() # get apk meta infos apk_meta = gridfs_obj.metadata package_name, version_name, path, _hash, import_date, tag = apk_meta[RESOBJ_APK_META_PACKAGE_NAME], apk_meta[RESOBJ_APK_META_VERSION_NAME], apk_meta[RESOBJ_APK_META_PATH], apk_meta[RESOBJ_APK_META_HASH], apk_meta[RESOBJ_APK_META_IMPORT_DATE], apk_meta[RESOBJ_APK_META_TAG] # use to hold apk meta infos fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag) eandro_apk = AnalyzeUtil.open_apk(apk_zipfile, fast_apk, raw = True) log.info("got apk") return eandro_apk except NoFile: raise except PyMongoError as e: raise DatabaseLoadException(self, content = "Apk (hash=%s)" % _hash, caused_by = e), None, sys.exc_info()[2]
def get_apk(self, _hash, apk=None, **kwargs): ''' Get the `EAndroApk` from `_hash`. Parameters ---------- _hash : str Hash of the .apk (sha256) apk : Apk Carries metainformation needed to build the whole path to the element in S3. Raises ------ S3StorageLoadException Returns ------- EAndroApk Apk constructed from raw data and meta infos. ''' # use to hold apk meta infos #fast_apk = FastApk(package_name, version_name, path, _hash, import_date, tag) try: apk_raw = BytesIO() if apk is None: raise S3StorageLoadException( self, content="Apk:%s" % apk.short_description(), caused_by=RuntimeError("No APK metainformation given!" )), None, sys.exc_info()[2] _id = Util.get_apk_path_incl_filename(apk) self.bucket_get(_id).get_contents_to_file(apk_raw) apk_raw.seek(0) eandro_apk = AnalyzeUtil.open_apk(apk_raw.read(), None, raw=True) return eandro_apk except (BotoClientError, S3ResponseError) as e: raise S3StorageLoadException(self, content="Apk:%s" % apk.short_description(), caused_by=e), None, sys.exc_info()[2]
def run(self): work_queue = self.work_queue try: for work in iter(work_queue.get, STOP_SENTINEL): try: apk_path, _apk, _ = work eandro_apk = AnalyzeUtil.open_apk(apk_path, apk=_apk) # do the analysis res = self.analyze_apk(eandro_apk) # remember yet analyzed APKs if eandro_apk: self.analyzed_apks.put( FastApk.load_from_eandroapk(eandro_apk)) # collect results if res is not None: self.__store_results([res]) else: # increment analyzed apks counter self.add_analyzed_apks_sm(1) except KeyboardInterrupt as e: raise e except Exception as e: log.exception(e) finally: # signal one task done work_queue.task_done() # signal sentinel read work_queue.task_done() work_queue.close() # be silent except KeyboardInterrupt: pass
def run(self): work_queue = self.work_queue try: for work in iter(work_queue.get, STOP_SENTINEL): try: apk_path, _apk, _ = work eandro_apk = AnalyzeUtil.open_apk(apk_path, apk=_apk) # do the analysis res = self.analyze_apk(eandro_apk) # remember yet analyzed APKs if eandro_apk: self.analyzed_apks.put(FastApk.load_from_eandroapk(eandro_apk)) # collect results if res is not None: self.__store_results([res]) else: # increment analyzed apks counter self.add_analyzed_apks_sm(1) except KeyboardInterrupt as e: raise e except Exception as e: log.exception(e) finally: # signal one task done work_queue.task_done() # signal sentinel read work_queue.task_done() work_queue.close() # be silent except KeyboardInterrupt: pass
def run(self, androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash=True, fast_apk=None): ''' Do the analysis on the apk with the given scripts. Parameters ---------- androscripts : list<str> List of package names. script_hashes : list<str> If given, set the hash for the `AndroScript`s min_script_needs : tuple<bool> See :py:method:`ScriptUtil.get_maximal_script_options`. apk_zipfile_or_hash : str The raw contents of the .apk file or the hash (sha256). The raw content of the .apk file (zipfile) or the hash of it (id in db). is_hash : bool, optional (default is True) Determines if `apk_zipfile_or_hash` is a hash (id). fast_apk : FastApk, optional (default is None) Holds the meta infos for the apk. Returns ------- tuple<tuple<str, bool>> First component is the id of the entry and the second a boolean indication if the result has been stored in gridfs. () If an error occurred. ''' try: # method retry_arguments self.__retry_arguments = androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash, fast_apk eandro_apk = None do_script_hash_validation = settings.script_hash_validation_enabled( ) # open database/apk storage if not already done # reschedule job if connection/open error self.__open_db() self.__open_apk_storage() # setup scripts if do_script_hash_validation: # validate sent hashes with local script hashes self.__setup_scripts_hash_validation(androscripts, script_hashes) else: # reuse if possible self.__setup_scripts_reuse(androscripts, script_hashes) # open apk if not is_hash: log.info("opening apk via raw data ... ") eandro_apk = AnalyzeUtil.open_apk( apk_or_path=apk_zipfile_or_hash, apk=fast_apk, raw=True) else: # get apk from prefetched apk pool eandro_apk = apk_prefetch_pool.get(apk_zipfile_or_hash, None) # could not prefetch if eandro_apk is None: eandro_apk = self.__get_apk_from_storage_retry( apk_zipfile_or_hash, apk=fast_apk) # if None, could not be opened and error has been logged if eandro_apk is not None: result = AnalyzeUtil.analyze_apk( eandro_apk, self.androscripts, min_script_needs, propagate_error=False, reset_scripts=not do_script_hash_validation) if result is not None: fastapk, script_results = result log.info("analyzed %s", fastapk.short_description()) storage_results = self.__store_results( fastapk, script_results) # can be None if errorr occurred if storage_results: return tuple(storage_results) return () except SoftTimeLimitExceeded: log.warn("Task %s exceeded it's soft time limit!", self) raise except ScriptHashValidationError: raise finally: # delete from pool -> we don't need it anymore in the pool if is_hash and apk_zipfile_or_hash in apk_prefetch_pool: del apk_prefetch_pool[apk_zipfile_or_hash]
def androguard_load_from_io(file_like_object=None, apk_file_path=None, calculate_hash=True): ''' Load a FastApk from file-like object or path by using `androgaurd`. Parameters ---------- file_like_object : file-like-object, optional (default is None) A file-like obj that points to the apk. If non given, try to open a file_like_object from the given `apk_file_path`. apk_file_path: str, optional (default is "not set") Path of apk calculate_hash : bool If true calculate the hash. This means the file has be to loaded completely into memory. If False, the hash will be calculated the first time it gets retrieved. Returns ------- apk: FastApk Raises ------ CouldNotOpenApk If the apk file could not be opened ''' # prevent circular import from androlyze.analyze import AnalyzeUtil # apk will be loaded from `flo` variable flo = file_like_object # indicates if file has been opened from path file_open_from_path = False # no file_like_object given, open file from path if file_like_object is None and isinstance(apk_file_path, str): try: flo = open(apk_file_path, "rb") file_open_from_path = True except IOError as e: flo.close() raise CouldNotOpenApk(apk_file_path, e), None, sys.exc_info()[2] # if file path not set, show at least that it's not seen in the exceptions if apk_file_path is None: apk_file_path = "not set" # load apk into memory and calculate hash if option set flo.seek(0) _hash = None data = flo.read() if calculate_hash: # calculate hash _hash = Util.sha256(data) flo.seek(0) apk = AnalyzeUtil.open_apk(data, raw=True, path=apk_file_path) if file_open_from_path: flo.close() if apk is not None: try: return FastApk.load_from_eandroapk(apk) except KeyError: pass # could not open apk -> raise error raise CouldNotOpenApk(file_path=apk_file_path)
def androguard_load_from_io(file_like_object = None, apk_file_path = None, calculate_hash = True): ''' Load a FastApk from file-like object or path by using `androgaurd`. Parameters ---------- file_like_object : file-like-object, optional (default is None) A file-like obj that points to the apk. If non given, try to open a file_like_object from the given `apk_file_path`. apk_file_path: str, optional (default is "not set") Path of apk calculate_hash : bool If true calculate the hash. This means the file has be to loaded completely into memory. If False, the hash will be calculated the first time it gets retrieved. Returns ------- apk: FastApk Raises ------ CouldNotOpenApk If the apk file could not be opened ''' # prevent circular import from androlyze.analyze import AnalyzeUtil # apk will be loaded from `flo` variable flo = file_like_object # indicates if file has been opened from path file_open_from_path = False # no file_like_object given, open file from path if file_like_object is None and isinstance(apk_file_path, str): try: flo = open(apk_file_path, "rb") file_open_from_path = True except IOError as e: flo.close() raise CouldNotOpenApk(apk_file_path, e), None, sys.exc_info()[2] # if file path not set, show at least that it's not seen in the exceptions if apk_file_path is None: apk_file_path = "not set" # load apk into memory and calculate hash if option set flo.seek(0) _hash = None data = flo.read() if calculate_hash: # calculate hash _hash = Util.sha256(data) flo.seek(0) apk = AnalyzeUtil.open_apk(data, raw = True, path = apk_file_path) if file_open_from_path: flo.close() if apk is not None: try: return FastApk.load_from_eandroapk(apk) except KeyError: pass # could not open apk -> raise error raise CouldNotOpenApk(file_path = apk_file_path)
def run(self, androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash = True, fast_apk = None): ''' Do the analysis on the apk with the given scripts. Parameters ---------- androscripts : list<str> List of package names. script_hashes : list<str> If given, set the hash for the `AndroScript`s min_script_needs : tuple<bool> See :py:method:`ScriptUtil.get_maximal_script_options`. apk_zipfile_or_hash : str The raw contents of the .apk file or the hash (sha256). The raw content of the .apk file (zipfile) or the hash of it (id in db). is_hash : bool, optional (default is True) Determines if `apk_zipfile_or_hash` is a hash (id). fast_apk : FastApk, optional (default is None) Holds the meta infos for the apk. Returns ------- tuple<tuple<str, bool>> First component is the id of the entry and the second a boolean indication if the result has been stored in gridfs. () If an error occurred. ''' try: # method retry_arguments self.__retry_arguments = androscripts, min_script_needs, script_hashes, apk_zipfile_or_hash, is_hash, fast_apk eandro_apk = None do_script_hash_validation = settings.script_hash_validation_enabled() # open database/apk storage if not already done # reschedule job if connection/open error self.__open_db() self.__open_apk_storage() # setup scripts if do_script_hash_validation: # validate sent hashes with local script hashes self.__setup_scripts_hash_validation(androscripts, script_hashes) else: # reuse if possible self.__setup_scripts_reuse(androscripts, script_hashes) # open apk if not is_hash: log.info("opening apk via raw data ... ") eandro_apk = AnalyzeUtil.open_apk(apk_or_path = apk_zipfile_or_hash, apk = fast_apk, raw = True) else: # get apk from prefetched apk pool eandro_apk = apk_prefetch_pool.get(apk_zipfile_or_hash, None) # could not prefetch if eandro_apk is None: eandro_apk = self.__get_apk_from_storage_retry(apk_zipfile_or_hash, apk = fast_apk) # if None, could not be opened and error has been logged if eandro_apk is not None: result = AnalyzeUtil.analyze_apk(eandro_apk, self.androscripts, min_script_needs, propagate_error = False, reset_scripts = not do_script_hash_validation) if result is not None: fastapk, script_results = result log.info("analyzed %s", fastapk.short_description()) storage_results = self.__store_results(fastapk, script_results) # can be None if errorr occurred if storage_results: return tuple(storage_results) return () except SoftTimeLimitExceeded: log.warn("Task %s exceeded it's soft time limit!", self) raise except ScriptHashValidationError: raise finally: # delete from pool -> we don't need it anymore in the pool if is_hash and apk_zipfile_or_hash in apk_prefetch_pool: del apk_prefetch_pool[apk_zipfile_or_hash]