def run(self): """Run androguard to extract static android information @return: list of static features """ self.key = "apkinfo" apkinfo = {} if "file" not in self.task["category"] or not HAVE_ANDROGUARD: return f = File(self.task["target"]) if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError( "Sample file doesn't exist: \"%s\"" % self.file_path) try: a = APK(self.file_path) if a.is_valid_APK(): manifest = {} apkinfo["files"] = self._apk_files(a) manifest["package"] = a.get_package() # manifest["permissions"]=a.get_details_permissions_new() manifest["main_activity"] = a.get_main_activity() manifest["activities"] = a.get_activities() manifest["services"] = a.get_services() manifest["receivers"] = a.get_receivers() # manifest["receivers_actions"]=a.get__extended_receivers() manifest["providers"] = a.get_providers() manifest["libraries"] = a.get_libraries() apkinfo["manifest"] = manifest # apkinfo["certificate"] = a.get_certificate() static_calls = {} if self.check_size(apkinfo["files"]): vm = DalvikVMFormat(a.get_dex()) vmx = uVMAnalysis(vm) static_calls["all_methods"] = self.get_methods(vmx) static_calls[ "is_native_code"] = analysis.is_native_code(vmx) static_calls["is_dynamic_code"] = analysis.is_dyn_code( vmx) static_calls[ "is_reflection_code"] = analysis.is_reflection_code( vmx) # static_calls["dynamic_method_calls"]= analysis.get_show_DynCode(vmx) # static_calls["reflection_method_calls"]= analysis.get_show_ReflectionCode(vmx) # static_calls["permissions_method_calls"]= analysis.get_show_Permissions(vmx) # static_calls["crypto_method_calls"]= analysis.get_show_CryptoCode(vmx) # static_calls["native_method_calls"]= analysis.get_show_NativeMethods(vmx) else: log.warning("Dex size bigger than: %s", self.options.decompilation_threshold) apkinfo["static_method_calls"] = static_calls except (IOError, OSError, BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return apkinfo
def run(self): """Run androguard to extract static android information @return: list of static features """ self.key = "apkinfo" apkinfo = {} if "file" not in self.task["category"]: return from androguard.core.bytecodes.apk import APK from androguard.core.bytecodes.dvm import DalvikVMFormat from androguard.core.analysis.analysis import uVMAnalysis from androguard.core.analysis import analysis f = File(self.task["target"]) if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path) try: a = APK(self.file_path) if a.is_valid_APK(): manifest = {} apkinfo["files"] = self._apk_files(a) manifest["package"] = a.get_package() # manifest["permissions"]=a.get_details_permissions_new() manifest["main_activity"] = a.get_main_activity() manifest["activities"] = a.get_activities() manifest["services"] = a.get_services() manifest["receivers"] = a.get_receivers() # manifest["receivers_actions"]=a.get__extended_receivers() manifest["providers"] = a.get_providers() manifest["libraries"] = a.get_libraries() apkinfo["manifest"] = manifest # apkinfo["certificate"] = a.get_certificate() static_calls = {} if self.check_size(apkinfo["files"]): vm = DalvikVMFormat(a.get_dex()) vmx = uVMAnalysis(vm) static_calls["all_methods"] = self.get_methods(vmx) static_calls["is_native_code"] = analysis.is_native_code(vmx) static_calls["is_dynamic_code"] = analysis.is_dyn_code(vmx) static_calls["is_reflection_code"] = analysis.is_reflection_code(vmx) # static_calls["dynamic_method_calls"]= analysis.get_show_DynCode(vmx) # static_calls["reflection_method_calls"]= analysis.get_show_ReflectionCode(vmx) # static_calls["permissions_method_calls"]= analysis.get_show_Permissions(vmx) # static_calls["crypto_method_calls"]= analysis.get_show_CryptoCode(vmx) # static_calls["native_method_calls"]= analysis.get_show_NativeMethods(vmx) else: log.warning("Dex size bigger than: %s", self.options.decompilation_threshold) apkinfo["static_method_calls"] = static_calls except (IOError, OSError, zipfile.BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return apkinfo
def extract_attributes(sha256): with NamedTemporaryFile() as f: f.write(default_storage.open(sha256).read()) f.seek(0) sign = ApplicationSignature.compute_from_apk(f.name) package = sign.handle sign = sign.to_dict() a = APK(f.name) sign['uploaded_at'] = datetime.now() sign['sha256'] = sha256 sign['activities'] = a.get_activities() sign['features'] = a.get_features() sign['libraries'] = a.get_libraries() sign['main_activity'] = a.get_activities() sign['min_sdk_version'] = a.get_min_sdk_version() sign['max_sdk_version'] = a.get_max_sdk_version() sign['target_sdk_version'] = a.get_target_sdk_version() sign['permissions'] = a.get_permissions() sign['aosp_permissions'] = a.get_requested_aosp_permissions() sign[ 'third_party_permissions'] = a.get_requested_third_party_permissions( ) sign['providers'] = a.get_providers() sign['receivers'] = a.get_receivers() sign['services'] = a.get_services() sign['is_valid'] = a.is_valid_APK() sign['is_signed'] = a.is_signed() sign['is_signed_v1'] = a.is_signed_v1() sign['is_signed_v2'] = a.is_signed_v2() sign['is_signed_v3'] = a.is_signed_v3() if not es.exists(settings.ELASTICSEARCH_APK_INDEX, id=sha256): es.index(index=settings.ELASTICSEARCH_APK_INDEX, id=sha256, body=sign) else: es.update(index=settings.ELASTICSEARCH_APK_INDEX, id=sha256, body={'doc': sign}, retry_on_conflict=5) del a, sign, f gc.collect() return package
def testFeatures(self): from androguard.core.bytecodes.apk import APK # First Demo App a = APK("examples/tests/com.example.android.tvleanback.apk") self.assertListEqual(list(a.get_features()), ["android.hardware.microphone", "android.hardware.touchscreen", "android.software.leanback"]) self.assertTrue(a.is_androidtv()) self.assertFalse(a.is_wearable()) self.assertTrue(a.is_leanback()) # Second Demo App a = APK("examples/tests/com.example.android.wearable.wear.weardrawers.apk") self.assertListEqual(list(a.get_features()), ["android.hardware.type.watch"]) self.assertTrue(a.is_wearable()) self.assertFalse(a.is_leanback()) self.assertFalse(a.is_androidtv()) self.assertListEqual(list(a.get_libraries()), ["com.google.android.wearable"])
def testFeatures(self): from androguard.core.bytecodes.apk import APK # First Demo App a = APK("examples/tests/com.example.android.tvleanback.apk") self.assertListEqual(list(a.get_features()), ["android.hardware.microphone", "android.hardware.touchscreen", "android.software.leanback"]) self.assertTrue(a.is_androidtv()) self.assertFalse(a.is_wearable()) self.assertTrue(a.is_leanback()) # Second Demo App a = APK("examples/tests/com.example.android.wearable.wear.weardrawers.apk") self.assertListEqual(list(a.get_features()), ["android.hardware.type.watch"]) self.assertTrue(a.is_wearable()) self.assertFalse(a.is_leanback()) self.assertFalse(a.is_androidtv()) self.assertListEqual(list(a.get_libraries()), ["com.google.android.wearable"])
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except Exception as e: print e return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] # result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 # result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 # result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 # result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method_by_name(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) result['feature_vectors']['others'] = [ # result['is_reflection_code'], # result['is_crypto_code'], # result['is_native_code'], result['is_obfuscation'], result['is_database'], # result['is_dyn_code'] ] return result
def run(self): """Run androguard to extract static android information @return: list of static features """ self.key = "apkinfo" apkinfo = {} if "file" not in self.task["category"] or not HAVE_ANDROGUARD: return f = File(self.task["target"]) #if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path) try: a = APK(self.file_path) if a.is_valid_APK(): manifest = {} apkinfo["files"] = self._apk_files(a) manifest["package"] = a.get_package() apkinfo["hidden_payload"] = [] for file in apkinfo["files"]: if self.file_type_check(file): apkinfo["hidden_payload"].append(file) apkinfo["files_flaged"] = self.files_name_map manifest["permissions"] = get_permissions(a) manifest["main_activity"] = a.get_main_activity() manifest["activities"] = a.get_activities() manifest["services"] = a.get_services() manifest["receivers"] = a.get_receivers() manifest["receivers_actions"] = get_extended_receivers(a) manifest["providers"] = a.get_providers() manifest["libraries"] = list(a.get_libraries()) apkinfo["manifest"] = manifest apkinfo["icon"] = get_apk_icon(self.file_path) certificate = get_certificate(self.file_path) if certificate: apkinfo["certificate"] = certificate #vm = DalvikVMFormat(a.get_dex()) #strings = vm.get_strings() strings = self._get_strings(self.file_path) apkinfo["interesting_strings"] = find_strings(strings) apkinfo["dex_strings"] = strings static_calls = {} if self.options.decompilation: if self.check_size(apkinfo["files"]): vm = DalvikVMFormat(a.get_dex()) vmx = Analysis(vm) vmx.create_xref() static_calls["all_methods"] = get_methods(vmx) static_calls[ "permissions_method_calls"] = get_show_Permissions( vmx) static_calls[ "native_method_calls"] = get_show_NativeMethods( vmx) static_calls["is_native_code"] = bool( static_calls["native_method_calls"] ) # True if not empty, False if empty static_calls[ "dynamic_method_calls"] = get_show_DynCode(vmx) static_calls["is_dynamic_code"] = bool( static_calls["dynamic_method_calls"]) static_calls[ "reflection_method_calls"] = get_show_ReflectionCode( vmx) static_calls["is_reflection_code"] = bool( static_calls["reflection_method_calls"]) static_calls[ "crypto_method_calls"] = get_show_CryptoCode(vmx) static_calls["is_crypto_code"] = bool( static_calls["crypto_method_calls"]) classes = list() for cls in vm.get_classes(): classes.append(cls.name) static_calls["classes"] = classes else: log.warning( "Aborted decompilation, static extraction of calls not perforemd", ) apkinfo["static_method_calls"] = static_calls except (IOError, OSError, BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return apkinfo
class StaticAnalysis: def __init__(self, apk_path=None): self.apk = None self.apk_path = apk_path self.signatures = None self.compiled_tracker_signature = None self.classes = None self.app_details = None if apk_path is not None: self.load_apk() def _compile_signatures(self): """ Compiles the regex associated to each signature, in order to speed up the trackers detection. :return: A compiled list of signatures. """ self.compiled_tracker_signature = [] try: self.compiled_tracker_signature = [ re.compile(track.code_signature) for track in self.signatures ] except TypeError: print("self.signatures is not iterable") def load_trackers_signatures(self): """ Load trackers signatures from the official Exodus database. :return: a dictionary containing signatures. """ self.signatures = [] exodus_url = "https://reports.exodus-privacy.eu.org/api/trackers" r = requests.get(exodus_url) data = r.json() for e in data['trackers']: self.signatures.append( namedtuple( 'tracker', data['trackers'][e].keys())(*data['trackers'][e].values())) self._compile_signatures() logging.debug('{} trackers signatures loaded'.format( len(self.signatures))) def load_apk(self): """ Load the APK file. """ if self.apk is None: self.apk = APK(self.apk_path) def get_embedded_classes(self): """ Get the list of Java classes embedded into all DEX files. :return: array of Java classes names as string """ if self.classes is not None: return self.classes class_regex = re.compile(r'classes.*\.dex') with TemporaryDirectory() as tmp_dir: with zipfile.ZipFile(self.apk_path, "r") as apk_zip: class_infos = (info for info in apk_zip.infolist() if class_regex.search(info.filename)) for info in class_infos: apk_zip.extract(info, tmp_dir) dexdump = which('dexdump') cmd = '{} {}/classes*.dex | perl -n -e\'/[A-Z]+((?:\w+\/)+\w+)/ && print "$1\n"\'|sort|uniq'.format( dexdump, tmp_dir) try: self.classes = subprocess.check_output( cmd, stderr=subprocess.STDOUT, shell=True, universal_newlines=True).splitlines() logging.debug('{} classes found in {}'.format( len(self.classes), self.apk_path)) return self.classes except subprocess.CalledProcessError: logging.error('Unable to decode {}'.format(self.apk_path)) raise Exception('Unable to decode the APK') def detect_trackers_in_list(self, class_list): """ Detect embedded trackers in the provided classes list. :return: list of embedded trackers """ if self.signatures is None: self.load_trackers_signatures() def _detect_tracker(sig, tracker, class_list): for clazz in class_list: if sig.search(clazz): return tracker return None results = [] args = [(self.compiled_tracker_signature[index], tracker, class_list) for (index, tracker) in enumerate(self.signatures) if len(tracker.code_signature) > 3] for res in itertools.starmap(_detect_tracker, args): if res: results.append(res) trackers = [t for t in results if t is not None] logging.debug('{} trackers detected in {}'.format( len(trackers), self.apk_path)) return trackers def detect_trackers(self, class_list_file=None): """ Detect embedded trackers. :return: list of embedded trackers """ if self.signatures is None: self.load_trackers_signatures() if class_list_file is None: return self.detect_trackers_in_list(self.get_embedded_classes()) else: with open(class_list_file, 'r') as classes_file: classes = classes_file.readlines() return self.detect_trackers_in_list(classes) def get_version(self): """ Get the application version name :return: version name """ self.load_apk() return self.apk.get_androidversion_name() def get_version_code(self): """ Get the application version code :return: version code """ self.load_apk() return self.apk.get_androidversion_code() def get_permissions(self): """ Get application permissions :return: application permissions list """ self.load_apk() return self.apk.get_permissions() def get_app_name(self): """ Get application name :return: application name """ self.load_apk() return self.apk.get_app_name() def get_package(self): """ Get application package :return: application package """ self.load_apk() return self.apk.get_package() def get_libraries(self): """ Get application libraries :return: application libraries list """ self.load_apk() return self.apk.get_libraries() def get_icon_path(self): """ Get the icon path in the ZIP archive :return: icon path in the ZIP archive """ self.load_apk() return self.apk.get_app_icon() def get_application_details(self): """ Get the application details like creator, number of downloads, etc. :param handle: application handle :return: application details dictionary """ self.load_apk() if self.app_details is not None: return self.app_details details = get_details_from_gplaycli(self.get_package()) if details is not None: self.app_details = details return details def _get_icon_from_details(self, path): """ Get icon from applications details dictionary :param path: path where to write the icon file :return: icon path :raises Exception: if unable to find icon """ details = self.get_application_details() if details is not None: for i in details.get('images'): if i.get('imageType') == 4: f = requests.get(i.get('url')) with open(path, mode='wb') as fp: fp.write(f.content) if os.path.isfile(path) and os.path.getsize(path) > 0: return path raise Exception('Unable to download the icon from details') @staticmethod def _render_drawable_to_png(self, bxml, path): ap = axml.AXMLPrinter(bxml) print(ap.get_buff()) def save_icon(self, path): """ Extract the icon from the ZIP archive and save it at the given path :param path: destination path of the icon :return: destination path of the icon, None in case of error """ try: icon = self.get_icon_path() if icon is None: raise Exception('Unable to get icon path') with zipfile.ZipFile(self.apk_path) as z: with open(path, 'wb') as f: f.write(z.read(icon)) with Image.open(path) as _: logging.info('Get icon from APK: success') return path except Exception: logging.warning('Unable to get the icon from the APK') return None # TODO: Set this back once details download is working again # logging.warning('Downloading icon from details') # try: # saved_path = self._get_icon_from_details(path) # logging.debug('Icon downloaded from application details') # return saved_path # except Exception as e: # logging.warning(e) def get_icon_phash(self): """ Get the perceptual hash of the application icon :return: the perceptual hash, empty string in case of error """ with NamedTemporaryFile() as ic: path = self.save_icon(ic.name) if path is None: logging.error('Unable to save the icon') return '' return self.get_phash(ic.name) @staticmethod def get_phash(image_name): """ Get the perceptual hash of the given image :param image_name: name of the image file :return: the perceptual hash, empty string in case of error """ dhash.force_pil() # Force PIL try: image = Image.open(image_name).convert("RGBA") row, col = dhash.dhash_row_col(image, size=PHASH_SIZE) return row << (PHASH_SIZE * PHASH_SIZE) | col except IOError as e: logging.error(e) return '' @staticmethod def get_icon_similarity(phash_origin, phash_candidate): """ Get icons similarity score [0,1.0] :param phash_origin: original icon :param phash_candidate: icon to be compared :return: similarity score [0,1.0] """ diff = dhash.get_num_bits_different(phash_origin, phash_candidate) return 1 - 1. * diff / (PHASH_SIZE * PHASH_SIZE * 2) def get_application_universal_id(self): parts = [self.get_package()] for c in self.get_certificates(): parts.append(c.fingerprint.upper()) return sha1(' '.join(parts).encode('utf-8')).hexdigest().upper() def get_certificates(self): certificates = [] def _my_name_init(self, oid, value, _type=_SENTINEL): if not isinstance(oid, ObjectIdentifier): raise TypeError( "oid argument must be an ObjectIdentifier instance.") if not isinstance(value, six.text_type): raise TypeError("value argument must be a text type.") if len(value) == 0: raise ValueError("Value cannot be an empty string") if _type == _SENTINEL: _type = _NAMEOID_DEFAULT_TYPE.get(oid, _ASN1Type.UTF8String) if not isinstance(_type, _ASN1Type): raise TypeError("_type must be from the _ASN1Type enum") self._oid = oid self._value = value self._type = _type NameAttribute.__init__ = _my_name_init signs = self.apk.get_signature_names() for s in signs: c = self.apk.get_certificate(s) cert = Certificate(c) certificates.append(cert) return certificates def get_apk_size(self): """ Get the APK file size in bytes :return: APK file size """ return os.path.getsize(self.apk_path) def get_sha256(self): """ Get the sha256sum of the APK file :return: hex sha256sum """ BLOCKSIZE = 65536 hasher = sha256() with open(self.apk_path, 'rb') as apk: buf = apk.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = apk.read(BLOCKSIZE) return hasher.hexdigest() def save_embedded_classes_in_file(self, file_path): """ Save list of embedded classes in file. :param file_path: file to write """ with open(file_path, 'w+') as f: f.write('\n'.join(self.get_embedded_classes())) def print_apk_infos(self): """ Print APK information """ permissions = self.get_permissions() libraries = self.get_libraries() certificates = self.get_certificates() print("=== Information") print('- APK path: {}'.format(self.apk_path)) print('- APK sum: {}'.format(self.get_sha256())) print('- App version: {}'.format(self.get_version())) print('- App version code: {}'.format(self.get_version_code())) print('- App UID: {}'.format(self.get_application_universal_id())) print('- App name: {}'.format(self.get_app_name())) print('- App package: {}'.format(self.get_package())) print('- App permissions: {}'.format(len(permissions))) for perm in permissions: print(' - {}'.format(perm)) print('- App libraries:') for lib in libraries: print(' - {}'.format(lib)) print('- Certificates: {}'.format(len(certificates))) for cert in certificates: print(' - {}'.format(cert)) def print_embedded_trackers(self): """ Print detected trackers """ trackers = self.detect_trackers() print('=== Found trackers: {}'.format(len(trackers))) for t in trackers: print(' - {}'.format(t.name))
def run(self): """Run androguard to extract static android information @return: list of static features """ self.key = "apkinfo" apkinfo = {} if "file" not in self.task["category"] or not HAVE_ANDROGUARD: return #f = File(self.task["target"]) #if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path) apkinfo["APKiD"] = self._scan_APKiD(self.file_path) try: a = APK(self.file_path) if a.is_valid_APK(): manifest = {} apkinfo["files"] = self._apk_files(a) apkinfo["encrypted_assets"] = self.find_encrypted_assets(a) manifest["package"] = a.get_package() apkinfo["hidden_payload"] = [] for file in apkinfo["files"]: if self.file_type_check(file): apkinfo["hidden_payload"].append(file) apkinfo["files_flaged"] = self.files_name_map manifest["permissions"]= get_permissions(a) manifest["main_activity"] = a.get_main_activity() manifest["activities"] = a.get_activities() manifest["services"] = a.get_services() manifest["receivers"] = a.get_receivers() manifest["receivers_actions"] = get_extended_receivers(a) manifest["receivers_info"] = get_receivers_info(a) manifest["providers"] = a.get_providers() manifest["libraries"] = a.get_libraries() apkinfo["manifest"] = manifest apkinfo["icon"] = get_apk_icon(self.file_path) certificate = get_certificate(self.file_path) if certificate: apkinfo["certificate"] = certificate #vm = DalvikVMFormat(a.get_dex()) #strings = vm.get_strings() strings = self._get_strings(self.file_path) for subdir, dirs, files in os.walk(self.dropped_path): for file in files: path = os.path.join(subdir, file) try: extra_strings = self._get_strings(path) strings = list(set(extra_strings + strings)) except: pass apkinfo["dex_strings"] = strings static_calls = {} if self.options.decompilation: if self.check_size(apkinfo["files"]): vm = DalvikVMFormat(a.get_dex()) vmx = uVMAnalysis(vm) # Be less verbose about androguard logging messages. logging.getLogger("andro.runtime").setLevel(logging.CRITICAL) static_calls["all_methods"] = get_methods(vmx) static_calls["is_native_code"] = analysis.is_native_code(vmx) static_calls["is_dynamic_code"] = analysis.is_dyn_code(vmx) static_calls["is_reflection_code"] = analysis.is_reflection_code(vmx) static_calls["is_crypto_code"] = is_crypto_code(vmx) static_calls["dynamic_method_calls"] = get_show_DynCode(vmx) static_calls["reflection_method_calls"] = get_show_ReflectionCode(vmx) static_calls["permissions_method_calls"] = get_show_Permissions(vmx) static_calls["crypto_method_calls"] = get_show_CryptoCode(vmx) static_calls["native_method_calls"] = get_show_NativeMethods(vmx) classes = list() for cls in vm.get_classes(): classes.append(cls.name) static_calls["classes"] = classes else: log.warning("Dex size bigger than: %s", self.options.decompilation_threshold) apkinfo["static_method_calls"] = static_calls except (IOError, OSError, BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return apkinfo
def run(self): """Run androguard to extract static android information @return: list of static features """ self.key = "apkinfo" apkinfo = {} if "file" not in self.task["category"] or not HAVE_ANDROGUARD: return f = File(self.task["target"]) #if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path) try: a = APK(self.file_path) if a.is_valid_APK(): manifest = {} apkinfo["files"] = self._apk_files(a) manifest["package"] = a.get_package() apkinfo["hidden_payload"] = [] for file in apkinfo["files"]: if self.file_type_check(file): apkinfo["hidden_payload"].append(file) apkinfo["files_flaged"] = self.files_name_map manifest["permissions"]= get_permissions(a) manifest["main_activity"] = a.get_main_activity() manifest["activities"] = a.get_activities() manifest["services"] = a.get_services() manifest["receivers"] = a.get_receivers() manifest["receivers_actions"] = get_extended_receivers(a) manifest["providers"] = a.get_providers() manifest["libraries"] = a.get_libraries() apkinfo["manifest"] = manifest apkinfo["icon"] = get_apk_icon(self.file_path) certificate = get_certificate(self.file_path) if certificate: apkinfo["certificate"] = certificate #vm = DalvikVMFormat(a.get_dex()) #strings = vm.get_strings() strings = self._get_strings(self.file_path) apkinfo["interesting_strings"] = find_strings(strings) apkinfo["dex_strings"] = strings static_calls = {} if self.options.decompilation: if self.check_size(apkinfo["files"]): vm = DalvikVMFormat(a.get_dex()) vmx = uVMAnalysis(vm) static_calls["all_methods"] = get_methods(vmx) static_calls["is_native_code"] = analysis.is_native_code(vmx) static_calls["is_dynamic_code"] = analysis.is_dyn_code(vmx) static_calls["is_reflection_code"] = analysis.is_reflection_code(vmx) static_calls["is_crypto_code"] = is_crypto_code(vmx) static_calls["dynamic_method_calls"] = get_show_DynCode(vmx) static_calls["reflection_method_calls"] = get_show_ReflectionCode(vmx) static_calls["permissions_method_calls"] = get_show_Permissions(vmx) static_calls["crypto_method_calls"] = get_show_CryptoCode(vmx) static_calls["native_method_calls"] = get_show_NativeMethods(vmx) classes = list() for cls in vm.get_classes(): classes.append(cls.name) static_calls["classes"] = classes else: log.warning("Dex size bigger than: %s", self.options.decompilation_threshold) apkinfo["static_method_calls"] = static_calls except (IOError, OSError, BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return apkinfo
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() #result['strings'] = d.get_strings() #result['class_names'] = [c.get_name() for c in d.get_classes()] #result['method_names'] = [m.get_name() for m in d.get_methods()] #result['field_names'] = [f.get_name() for f in d.get_fields()] class_names = [c.get_name() for c in d.get_classes()] method_names = [m.get_name() for m in d.get_methods()] field_names = [ f.get_name() for f in d.get_fields()] result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] #s_list.extend(result['class_names']) #s_list.extend(result['method_names']) #s_list.extend(result['field_names']) s_list.extend(class_names) s_list.extend(method_names) s_list.extend(method_names) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.tainted_packages.search_methods(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) opt_seq = [] for m in d.get_methods(): for i in m.get_instructions(): opt_seq.append(i.get_name()) optngramlist = [tuple(opt_seq[i:i+NGRAM]) for i in xrange(len(opt_seq) - NGRAM)] optngram = Counter(optngramlist) optcodes = dict() tmpCodes = dict(optngram) #for k,v in optngram.iteritems(): # if v>=NGRAM_THRE: #optcodes[str(k)] = v # optcodes[str(k)] = 1 tmpCodes = sorted(tmpCodes.items(),key =lambda d:d[1],reverse=True) for value in tmpCodes[:NGRAM_THRE]: optcodes[str(value[0])] = 1 result['feature_vectors']['opt_codes'] = optcodes return result