def __edit_text_analyse__(self, a: apk.APK, d: DalvikVMFormat, dx: Analysis): # load keywords with open( os.path.join(os.path.dirname(__file__), "assets" + os.path.sep + 'pii_keywords.txt'), 'r') as file: keywords: [str] = file.read().splitlines(False) # parse flowdroid results folder = os.path.join(os.path.dirname(__file__), "results" + os.path.sep + "flowdroid") file_name = a.get_filename().split(os.path.sep)[-1][:-4] fd_path = os.path.join(folder, file_name + ".xml") self.leak_id_names: [str] = [] if not os.path.exists(fd_path): print("Flowdroid result doesn't exist") return resource_list = self.__analyse_flowdroid_result__(fd_path) resource_ids = [] for s_id, s_method, s_statement, sink_method, sink_statement in resource_list: resource_ids.append(s_id) # find Resource class package_name = a.get_package() package_name = package_name.replace(".", "/") cls: ClassDefItem = d.get_class("L" + package_name + "/R$id;") # find resource id if cls is None: print("This application doesn't have an R class") return fields: [EncodedField] = cls.get_fields() # match ids with keywords for field in fields: field: EncodedField = field value: EncodedValue = field.get_init_value() # resource id -> resource name the_value = str(value.get_value()) field_name = field.get_name() if the_value in resource_ids and field_name in keywords: self.leak_id_names.append(field_name)
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) #vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() #result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] #result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 #result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 '''result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0''' result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 arr = [] s = a.get_elements("action", "name") for i in s: arr.append(i) result['intents'] = arr s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method(call) else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) #Search for the presence of intents in a given apk result['feature_vectors']['intents'] = [] n = len(INTENTS) m = len(result['intents']) for i in range(n): stri = INTENTS[i] flg = False for j in range(m): if stri in result['intents'][j]: flg = True break if flg: status = 1 else: status = 0 result['feature_vectors']['intents'].append(status) #Check for special strings in code result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) return result
class XAPK: def __init__(self, folder): self.folder = Path(folder) for x in self.folder.glob('*.apk'): self.apk_src = Path(x) break for x in self.folder.glob('*.obb'): self.obb_src = Path(x) break self.apk = APK(self.apk_src) self.manifest = self.make_manifest() self.icon = self.apk.get_file(self.apk.get_app_icon()) def make_manifest(self): apk_size = self.apk_src.stat().st_size if self.obb_src: obb_size = self.obb_src.stat().st_size else: obb_size = 0 total_size = apk_size + obb_size filename = self.apk.get_filename() manifest = {} manifest['xapk_version'] = 1 manifest['package_name'] = self.apk.get_package() manifest['name'] = self.apk.get_app_name() # manifest['locales_name'] = {} # TODO manifest['version_code'] = self.apk.get_androidversion_code() manifest['version_name'] = self.apk.get_androidversion_name() manifest['min_sdk_version'] = self.apk.get_min_sdk_version() manifest['target_sdk_version'] = self.apk.get_target_sdk_version() manifest['permissions'] = self.apk.get_declared_permissions() manifest['total_size'] = total_size manifest['expansions'] = [] if obb_size: main_obb = {} main_obb[ 'file'] = 'Android/obb/{package_name}/main.{version_code}.{package_name}.obb'.format( **manifest) main_obb['install_location'] = 'EXTERNAL_STORAGE' main_obb[ 'install_path'] = 'Android/obb/{package_name}/main.{version_code}.{package_name}.obb'.format( **manifest) manifest['expansions'].push(main_obb) return manifest def save(self): self.name = '{package_name}_v{version_name}.xapk'.format( **self.manifest) zip_path = self.folder.joinpath(self.name) zip_dir = tempfile.mkdtemp() try: print('copying apk to temp directory...') apk_name = '{package_name}.apk'.format(**self.manifest) apk_src = self.apk_src.resolve() apk_dest = PurePath(zip_dir).joinpath(apk_name) shutil.copy2(apk_src, apk_dest) print('apk: OK') if self.manifest.get('expansions'): print('copying obb to temp directory...') obb_name = self.manifest['expansions'][0]['install_path'] obb_src = self.obb_src.resolve() obb_dest = PurePath(zip_dir).joinpath(obb_name) os.makedirs(Path(obb_dest).parent, exist_ok=True) shutil.copy2(obb_src, obb_dest) print('obb: OK') else: print('no obb found') print('creating icon in temp directory...') icon = self.icon icon_dest = PurePath(zip_dir).joinpath('icon.png') with open(icon_dest, 'wb') as iconfile: iconfile.write(icon) print('icon: OK') print('creating manifest in temp directory...') manifest_dest = PurePath(zip_dir).joinpath('manifest.json') with open(manifest_dest, 'w') as manifestfile: s = json.dumps(self.manifest, separators=(':', ',')) manifestfile.write(s) print('manifest: OK') print('creating xapk archive...') with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED) as zfd: for root, dirs, files in os.walk(zip_dir): for f in files: filename = os.path.join(root, f) zfd.write(filename, os.path.relpath(filename, zip_dir)) print('xapk: OK') finally: print('cleaning up temp directory...') shutil.rmtree(zip_dir) print('cleanup: OK')
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except Exception as e: print e return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] # result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 # result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 # result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 # result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method_by_name(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) result['feature_vectors']['others'] = [ # result['is_reflection_code'], # result['is_crypto_code'], # result['is_native_code'], result['is_obfuscation'], result['is_database'], # result['is_dyn_code'] ] return result
def analyze(path): try: start = process_time() hashfunctions = dict(md5=hashlib.md5, sha1=hashlib.sha1, sha256=hashlib.sha256, sha512=hashlib.sha512) a = APK(path) certs = set( a.get_certificates_der_v3() + a.get_certificates_der_v2() + [a.get_certificate_der(x) for x in a.get_signature_names()]) for cert in certs: x509_cert = x509.Certificate.load(cert) issuer = { 'commonName': None, 'organizationName': None, 'organizationalUnitName': None, 'countryName': None, 'stateOrProvinceName': None, 'localityName': None } subject = { 'commonName': None, 'organizationName': None, 'organizationalUnitName': None, 'countryName': None, 'stateOrProvinceName': None, 'localityName': None } strIssuer = get_certificate_name_string(x509_cert.issuer, short=False) strSubject = get_certificate_name_string(x509_cert.subject, short=False) arrIssuer = strIssuer.split(',') for i in arrIssuer: if i.lstrip().split('=')[0] == 'commonName': issuer['commonName'] = i.lstrip().split('=')[1] elif i.lstrip().split('=')[0] == 'organizationName': issuer['organizationName'] = i.lstrip().split('=')[1] elif i.lstrip().split('=')[0] == 'organizationalUnitName': issuer['organizationalUnitName'] = i.lstrip().split('=')[1] elif i.lstrip().split('=')[0] == 'countryName': issuer['countryName'] = i.lstrip().split('=')[1] elif i.lstrip().split('=')[0] == 'stateOrProvinceName': issuer['stateOrProvinceName'] = i.lstrip().split('=')[1] elif i.lstrip().split('=')[0] == 'localityName': issuer['localityName'] = i.lstrip().split('=')[1] arrSubject = strSubject.split(',') for i in arrSubject: if i.lstrip().split('=')[0] == 'commonName': subject['commonName'] = i.lstrip().split('=')[1] elif i.lstrip().split('=')[0] == 'organizationName': subject['organizationName'] = i.lstrip().split('=')[1] elif i.lstrip().split('=')[0] == 'organizationalUnitName': subject['organizationalUnitName'] = i.lstrip().split( '=')[1] elif i.lstrip().split('=')[0] == 'countryName': subject['countryName'] = i.lstrip().split('=')[1] elif i.lstrip().split('=')[0] == 'stateOrProvinceName': subject['stateOrProvinceName'] = i.lstrip().split('=')[1] elif i.lstrip().split('=')[0] == 'localityName': subject['localityName'] = i.lstrip().split('=')[1] for k, v in hashfunctions.items(): if k == 'md5': md5 = v(cert).hexdigest() elif k == 'sha1': sha1 = v(cert).hexdigest() elif k == 'sha256': sha256 = v(cert).hexdigest() elif k == 'sha512': sha512 = v(cert).hexdigest() md5 = md5 appName = a.get_app_name() fileSize = os.stat(a.get_filename()).st_size sha1 = sha1 sha256 = sha256 sha512 = sha512 timestamp = time.time() dateTime = datetime.fromtimestamp(timestamp) timeOfSubmit = dateTime.strftime("%Y-%m-%d %H:%M:%S") package = a.get_package() androidversionCode = a.get_androidversion_code() androidversionName = a.get_androidversion_name() minSDKVersion = a.get_min_sdk_version() maxSDKVersion = a.get_max_sdk_version() targetSDKVersion = a.get_target_sdk_version() mainActivity = a.get_main_activity() attributes = { 'validFrom': x509_cert['tbs_certificate']['validity'] ['not_before'].native.strftime("%Y-%m-%d %H:%M:%S"), 'validTo': x509_cert['tbs_certificate']['validity'] ['not_after'].native.strftime("%Y-%m-%d %H:%M:%S"), 'serialNumber': hex(x509_cert.serial_number), 'hashAlgorithm': x509_cert.hash_algo, 'signatureAlgorithm': x509_cert.signature_algo } certificateAttributes = json.dumps(attributes) certificateIssuer = json.dumps(issuer) certificateSubject = json.dumps(subject) declaredPermissions = json.dumps(a.get_declared_permissions()) requestedPermissions = json.dumps(a.get_permissions()) activities = json.dumps(a.get_activities()) services = json.dumps(a.get_services()) receivers = json.dumps(a.get_receivers()) providers = json.dumps(a.get_providers()) stop = process_time() analysisTime = stop - start connect = mysql.connect() cursor = connect.cursor() sql = "INSERT INTO tbl_apkinfo (md5, appName, fileSize, analysisTime, sha1, sha256, sha512, firstSubmission, lastSubmission, package, androidversionCode, androidversionName, minSDKVersion, maxSDKVersion, targetSDKVersion, mainActivity, certificateAttributes, certificateIssuer, certificateSubject, declaredPermissions, requestedPermissions, activities, services, providers, receivers) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" param = (md5, appName, fileSize, analysisTime, sha1, sha256, sha512, timeOfSubmit, timeOfSubmit, package, androidversionCode, androidversionName, minSDKVersion, maxSDKVersion, targetSDKVersion, mainActivity, certificateAttributes, certificateIssuer, certificateSubject, declaredPermissions, requestedPermissions, activities, services, providers, receivers) cursor.execute(sql, param) connect.commit() connect.close() androaxml_main(path, os.path.join(app.config['OUTPUT_PATH'], md5 + '.xml')) return True except: return False
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() #result['strings'] = d.get_strings() #result['class_names'] = [c.get_name() for c in d.get_classes()] #result['method_names'] = [m.get_name() for m in d.get_methods()] #result['field_names'] = [f.get_name() for f in d.get_fields()] class_names = [c.get_name() for c in d.get_classes()] method_names = [m.get_name() for m in d.get_methods()] field_names = [ f.get_name() for f in d.get_fields()] result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] #s_list.extend(result['class_names']) #s_list.extend(result['method_names']) #s_list.extend(result['field_names']) s_list.extend(class_names) s_list.extend(method_names) s_list.extend(method_names) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.tainted_packages.search_methods(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) opt_seq = [] for m in d.get_methods(): for i in m.get_instructions(): opt_seq.append(i.get_name()) optngramlist = [tuple(opt_seq[i:i+NGRAM]) for i in xrange(len(opt_seq) - NGRAM)] optngram = Counter(optngramlist) optcodes = dict() tmpCodes = dict(optngram) #for k,v in optngram.iteritems(): # if v>=NGRAM_THRE: #optcodes[str(k)] = v # optcodes[str(k)] = 1 tmpCodes = sorted(tmpCodes.items(),key =lambda d:d[1],reverse=True) for value in tmpCodes[:NGRAM_THRE]: optcodes[str(value[0])] = 1 result['feature_vectors']['opt_codes'] = optcodes return result