class App(object): """ this class describes an app """ def __init__(self, app_path, output_dir=None): """ create a App instance :param app_path: local file path of app :return: """ assert app_path is not None self.logger = logging.getLogger(self.__class__.__name__) self.app_path = app_path self.output_dir = output_dir if output_dir is not None: if not os.path.isdir(output_dir): os.makedirs(output_dir) from androguard.core.bytecodes.apk import APK self.apk = APK(self.app_path) self.package_name = self.apk.get_package() self.main_activity = self.apk.get_main_activity() self.permissions = self.apk.get_permissions() self.activities = self.apk.get_activities() self.possible_broadcasts = self.get_possible_broadcasts() self.dumpsys_main_activity = None self.hashes = self.get_hashes() def get_package_name(self): """ get package name of current app :return: """ return self.package_name def get_main_activity(self): """ get package name of current app :return: """ if self.main_activity is not None: return self.main_activity else: self.logger.warning("Cannot get main activity from manifest. Using dumpsys result instead.") return self.dumpsys_main_activity def get_start_intent(self): """ get an intent to start the app :return: Intent """ package_name = self.get_package_name() if self.get_main_activity(): package_name += "/%s" % self.get_main_activity() return Intent(suffix=package_name) def get_start_with_profiling_intent(self, trace_file, sampling=None): """ get an intent to start the app with profiling :return: Intent """ package_name = self.get_package_name() if self.get_main_activity(): package_name += "/%s" % self.get_main_activity() if sampling is not None: return Intent(prefix="start --start-profiler %s --sampling %d" % (trace_file, sampling), suffix=package_name) else: return Intent(prefix="start --start-profiler %s" % trace_file, suffix=package_name) def get_stop_intent(self): """ get an intent to stop the app :return: Intent """ package_name = self.get_package_name() return Intent(prefix="force-stop", suffix=package_name) def get_possible_broadcasts(self): possible_broadcasts = set() for receiver in self.apk.get_receivers(): intent_filters = self.apk.get_intent_filters('receiver', receiver) actions = intent_filters['action'] if 'action' in intent_filters else [] categories = intent_filters['category'] if 'category' in intent_filters else [] categories.append(None) for action in actions: for category in categories: intent = Intent(prefix='broadcast', action=action, category=category) possible_broadcasts.add(intent) return possible_broadcasts def get_hashes(self, block_size=2 ** 8): """ Calculate MD5,SHA-1, SHA-256 hashes of APK input file @param block_size: """ md5 = hashlib.md5() sha1 = hashlib.sha1() sha256 = hashlib.sha256() f = open(self.app_path, 'rb') while True: data = f.read(block_size) if not data: break md5.update(data) sha1.update(data) sha256.update(data) return [md5.hexdigest(), sha1.hexdigest(), sha256.hexdigest()]
class App(object): def __init__(self, app_path, root_path, app_name): print("Root path:"+root_path) assert app_path is not None self.logger = logging.getLogger(self.__class__.__name__) self.app_path = app_path from androguard.core.bytecodes.apk import APK self.apk = APK(self.app_path) self.package_name = self.apk.get_package() self.main_activity = self.apk.get_main_activity() self.permissions = self.apk.get_permissions() self.activities = self.apk.get_activities() if app_name is not None: self.app_name = app_name else: self.app_name = self.apk.get_app_name() print("Main activity:"+self.main_activity) print("Package name:"+self.package_name) self.output_path=root_path+self.package_name def get_package_name(self): """ get package name of current app :return: """ return self.package_name
def perform_analysis(self): if self.apk_file and os.path.exists(self.apk_file): try: apk = APK(self.apk_file) except Exception, ex: print ex return self.permissions = apk.get_permissions() # duplicate permissions check if (len(self.permissions) != len(set(self.permissions))): self.permission_duplicate = True # remove duplicate permissions self.permissions = list(set(self.permissions)) # uses-features features_name = apk.get_elements('uses-feature', 'android:name') if len(features_name) > 0: package_name = apk.get_package() features_used = apk.get_elements('uses-feature', 'android:required') for i in xrange(len(features_name)): if features_name[i] != '': if(features_used[i] != '%s.false' % package_name): self.features.append(features_name[i]) self.features = list(set(self.features))
def classify(file, ch): vector = {} result = 0 name, sdk, size = 'unknown', 'unknown', 'unknown' app = APK(file) perm = app.get_permissions() name, sdk, size = meta_fetch(file) for p in permissions: if p in perm: vector[p] = 1 else: vector[p] = 0 data = [v for v in vector.values()] data = np.array(data) if ch == 0: ANN = load_model('static/models/ANN.h5') #print(data) result = ANN.predict([data[sel.support_].tolist()]) print(result) if result < 0.02: # return 'Benign(safe)' result = 'Benign(safe)' else: # return 'Malware' result = 'Malware' if ch == 1: SVC = pickle.load(open('static/models/svc_ga.pkl', 'rb')) result = SVC.predict([data[sel.support_]]) if result == 'benign': result = 'Benign(safe)' else: result = 'Malware' return result, name, sdk, size
def testAPKManifest(self): from androguard.core.bytecodes.apk import APK a = APK("examples/android/TestsAndroguard/bin/TestActivity.apk", testzip=True) self.assertEqual(a.get_app_name(), "TestsAndroguardApplication") self.assertEqual(a.get_app_icon(), "res/drawable-hdpi/icon.png") self.assertEqual(a.get_app_icon(max_dpi=120), "res/drawable-ldpi/icon.png") self.assertEqual(a.get_app_icon(max_dpi=160), "res/drawable-mdpi/icon.png") self.assertEqual(a.get_app_icon(max_dpi=240), "res/drawable-hdpi/icon.png") self.assertIsNone(a.get_app_icon(max_dpi=1)) self.assertEqual(a.get_main_activity(), "tests.androguard.TestActivity") self.assertEqual(a.get_package(), "tests.androguard") self.assertEqual(a.get_androidversion_code(), '1') self.assertEqual(a.get_androidversion_name(), "1.0") self.assertEqual(a.get_min_sdk_version(), "9") self.assertEqual(a.get_target_sdk_version(), "16") self.assertIsNone(a.get_max_sdk_version()) self.assertEqual(a.get_permissions(), []) self.assertEqual(a.get_declared_permissions(), []) self.assertTrue(a.is_valid_APK())
def extract_features(file_path): #result = [] a = APK(file_path) #print(a.get_permissions()) #d = DalvikVMFormat(a.get_dex()) #dx = Analysis(d) #vm = dvm.DalvikVMFormat(a.get_dex()) #vmx = analysis.Analysis(vm) #d.set_Analysis(dx) #d.set_decompiler(DecompilerDAD(d, dx)) ''' try: a = APK(file_path) print(a.get_permissions()) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uAnalysis(vm) d.set_Analysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None ''' return a.get_permissions()
def getPermissionFromManifest(Apack): SetApk = APK(Apack) ManifestPermissions = SetApk.get_permissions() for permission in ManifestPermissions: if "android.permission" in permission: PermissionDeclared.append(permission) connection = pymysql.connect(host='localhost',user='******',password='',db='dbplaystore') try: with connection.cursor() as cursor: sql = "SELECT `perm_id`, `name` FROM permissions" try: cursor.execute(sql) result = cursor.fetchall() for row in result: perm_name = row if permission in perm_name: permID = row[0] DBConnector.createDeclaredPermission(title, permID) except Exception as e: print(e) connection.commit() finally: connection.close() with open("/home/fypj/Desktop/FYPJ" + AppName + "Declared permissions", "w+") as outfile: json.dump(PermissionDeclared, outfile)
def get_apk_info(self): apk = APK(self.apk_file) app_icon_file = apk.get_app_icon() app_icon_data = apk.get_file(app_icon_file) size = (256, 256) buffered = BytesIO() im = Image.open(BytesIO(app_icon_data)) im = im.resize(size, Image.ANTIALIAS) im.save(buffered, "PNG") app_icon_b64 = "data:image/png;base64," + base64.b64encode( buffered.getvalue()).decode('utf-8') self.package_name = apk.get_package() self.app_name = apk.get_app_name() self.report_saver.package_name = self.package_name self.report_saver.app_name = self.app_name self.report_saver.version = apk.get_androidversion_code() self.report_saver.app_icon = app_icon_b64 permission_parser = PermissionParser(mode='groups') permission_values = permission_parser.transform( apk.get_permissions()).flatten().tolist() permission_labels = permission_parser.labels() self.report_saver.permissions_actual = { permission_labels[i]: bool(v) for i, v in enumerate(permission_values) }
def testAPKPermissions(self): from androguard.core.bytecodes.apk import APK a = APK("examples/tests/a2dp.Vol_137.apk", testzip=True) self.assertEqual(a.get_package(), "a2dp.Vol") self.assertListEqual( sorted(a.get_permissions()), sorted([ "android.permission.RECEIVE_BOOT_COMPLETED", "android.permission.CHANGE_WIFI_STATE", "android.permission.ACCESS_WIFI_STATE", "android.permission.KILL_BACKGROUND_PROCESSES", "android.permission.BLUETOOTH", "android.permission.BLUETOOTH_ADMIN", "com.android.launcher.permission.READ_SETTINGS", "android.permission.RECEIVE_SMS", "android.permission.MODIFY_AUDIO_SETTINGS", "android.permission.READ_CONTACTS", "android.permission.ACCESS_COARSE_LOCATION", "android.permission.ACCESS_FINE_LOCATION", "android.permission.ACCESS_LOCATION_EXTRA_COMMANDS", "android.permission.WRITE_EXTERNAL_STORAGE", "android.permission.READ_PHONE_STATE", "android.permission.BROADCAST_STICKY", "android.permission.GET_ACCOUNTS" ]))
class VectorGroupPermis: def __init__(self, apk, pos): self.data = [] self.don = [] self.apk = APK(str(apk)) self.pos = pos self.main() # On recupere les permitions des APKs def getVectorOfXml(self): data_permission = [] for elt in self.apk.get_permissions(): permis = elt.split('.') if 'permission' in permis: data_permission.append(permis[-1]) return data_permission # Creer le fichier excel def setVector(self, vector): liste = [] for colVal in vector: liste.append(colVal) for k, elt in enumerate(liste): ligne = feuil.row(self.pos) ligne.write(k, elt) book.save('listepermission.xls') def main(self): a = self.getVectorOfXml() self.setVector(a) print(a)
def get_permissions(path): application = APK(path) permissions = application.get_permissions() permissions = list(set(permissions)) permissions.sort() return permissions
def extract_features(file_path): a = APK(file_path) # d = DalvikVMFormat(a.get_dex()) # dx = Analysis(d) # vm = dvm.DalvikVMFormat(a.get_dex()) # vmx = analysis.Analysis(vm) # d.set_vmanalysis(dx) # d.set_decompiler(DecompilerDAD(d, dx)) return a.get_permissions()
def extract_permissions(file): a = APK(file) d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) return a.get_permissions()
def create_perm_vector(apk_file): try: a = APK(apk_file) except: return None perms = a.get_permissions() for permission in PERMISSIONS: hit = 1 if permission in perms else 0 perm_vector.append(hit) return list(perm_vector)
def extract_features(file_path): #result = [] try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uAnalysis(vm) d.set_Analysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None return a.get_permissions() #it will return permission
def create_perm_vector(apk_file): try: a = APK(apk_file) except: return None apk_file_name = a.get_app_name() perms = a.get_permissions() perm_vector = [] * v for permission in PERMISSIONS: hit = 1 if permission in perms else 0 perm_vector.append(hit) return apk_file_name, perms, list(perm_vector)
def main(): f = open('permission_list.csv', 'w') file_list = file_name('source_apk') print('processing...') for apk in file_list: try: a = APK('source_apk/' + apk) b = a.get_permissions() f.write(apk + ',') for i in b: f.write(i + ',') f.write('\n') except: print(apk + 'occured an error!') f.close() print('finish...')
def extract_features_using_androguard(androguard: APK): resulat_dict = dict() count_dict = dict() _permissions = androguard.get_permissions() _hardwares = list(androguard.get_features()) np = len(_permissions) if np > 0: resulat_dict = { 'defined_perm': dict(Counter(_permissions)), } count_dict = {'num_permission': np} nh = len(_hardwares) if nh > 0: resulat_dict.update({'hardware': dict(Counter(_hardwares))}) count_dict.update({'num_hardware': len(_hardwares)}) # List of all intents in apk _filters = list() # Apk components components_dict = { 'activity': androguard.get_activities(), 'service': androguard.get_services(), 'receiver': androguard.get_receivers(), 'provider': androguard.get_providers() } # Extract intents for categorie, names in components_dict.items(): # Components and Count the number of each components in apk nm = len(names) if nm > 0: count_dict.update({'num_' + categorie: nm}) resulat_dict.update( {categorie: dict(Counter(components_dict[categorie]))}) # Extract intent filter for each component intents = extract_intent(androguard, categorie, names) ni = len(intents) if ni > 0: _filters.extend(intents) count_dict.update({'num_intent_' + categorie: ni}) nf = len(_filters) if nf > 0: resulat_dict.update({'intent': dict(Counter(_filters))}) count_dict.update({'num_intent': nf}) resulat_dict.update({'component_count': count_dict}) return resulat_dict
def extract_attributes(sha256): with NamedTemporaryFile() as f: f.write(default_storage.open(sha256).read()) f.seek(0) sign = ApplicationSignature.compute_from_apk(f.name) package = sign.handle sign = sign.to_dict() a = APK(f.name) sign['uploaded_at'] = datetime.now() sign['sha256'] = sha256 sign['activities'] = a.get_activities() sign['features'] = a.get_features() sign['libraries'] = a.get_libraries() sign['main_activity'] = a.get_activities() sign['min_sdk_version'] = a.get_min_sdk_version() sign['max_sdk_version'] = a.get_max_sdk_version() sign['target_sdk_version'] = a.get_target_sdk_version() sign['permissions'] = a.get_permissions() sign['aosp_permissions'] = a.get_requested_aosp_permissions() sign[ 'third_party_permissions'] = a.get_requested_third_party_permissions( ) sign['providers'] = a.get_providers() sign['receivers'] = a.get_receivers() sign['services'] = a.get_services() sign['is_valid'] = a.is_valid_APK() sign['is_signed'] = a.is_signed() sign['is_signed_v1'] = a.is_signed_v1() sign['is_signed_v2'] = a.is_signed_v2() sign['is_signed_v3'] = a.is_signed_v3() if not es.exists(settings.ELASTICSEARCH_APK_INDEX, id=sha256): es.index(index=settings.ELASTICSEARCH_APK_INDEX, id=sha256, body=sign) else: es.update(index=settings.ELASTICSEARCH_APK_INDEX, id=sha256, body={'doc': sign}, retry_on_conflict=5) del a, sign, f gc.collect() return package
def get_data(apkPath, apkName): a = APK(apkPath) jsonFile = apkName + ".json" pro = a.get_providers() rec = a.get_receivers() ser = a.get_services() act = a.get_activities() per = a.get_permissions() hwc = get_hardware(a) data = {'permissions': [], 'hardwareComponent': [], 'components': {}} data['permissions'] = per data['hardwareComponent'] = hwc data['components']['providers'] = pro data['components']['receivers'] = rec data['components']['services'] = rec data['components']['activities'] = act with open(jsonFile, 'w') as f: json.dump(data, f)
def extract_apk_permisson(name,category): path = '/media/新加卷/begin_android_english/'+category+'/'+name+'.apk' try: apk=APK(path) if apk.is_valid_APK(): package=apk.get_package() permissions=apk.get_permissions() # clean repeat permission simple_permissions=set() for p in permissions: p=p.split('.')[-1] if PERMISSIONS.has_key(p): simple_permissions.add(p) insert_sql='insert into apk_permission(package,category' attrs=',' for permission in simple_permissions: attrs=attrs+permission+',' attrs=attrs.rstrip(',') values="values ('%s','%s',"%(package,category) for i in range(len(simple_permissions)): values=values+'1,' values=values.rstrip(',') values=values+')' insert_sql=insert_sql+attrs+') '+values #print insert_sql db.insert(insert_sql) print ('analysis %s'%(path)) else: print('%s is not valid apk'%(path)) except: etype, evalue, tracebackObj = sys.exc_info()[:3] print ('apk:%s errortype:%s errorvalue:%s'%(path,etype,evalue)) finally: sql = "update apk set state = 0 where package='%s'"%name print sql db1.update(sql) print 1
def testAPKManifest(self): from androguard.core.bytecodes.apk import APK a = APK("examples/android/TestsAndroguard/bin/TestActivity.apk", testzip=True) self.assertEqual(a.get_app_name(), "TestsAndroguardApplication") self.assertEqual(a.get_app_icon(), "res/drawable-hdpi/icon.png") self.assertEqual(a.get_app_icon(max_dpi=120), "res/drawable-ldpi/icon.png") self.assertEqual(a.get_app_icon(max_dpi=160), "res/drawable-mdpi/icon.png") self.assertEqual(a.get_app_icon(max_dpi=240), "res/drawable-hdpi/icon.png") self.assertIsNone(a.get_app_icon(max_dpi=1)) self.assertEqual(a.get_main_activity(), "tests.androguard.TestActivity") self.assertEqual(a.get_package(), "tests.androguard") self.assertEqual(a.get_androidversion_code(), '1') self.assertEqual(a.get_androidversion_name(), "1.0") self.assertEqual(a.get_min_sdk_version(), "9") self.assertEqual(a.get_target_sdk_version(), "16") self.assertIsNone(a.get_max_sdk_version()) self.assertEqual(a.get_permissions(), []) self.assertEqual(a.get_declared_permissions(), []) self.assertTrue(a.is_valid_APK())
def get_permissions(path): perms = list() error_file = open("error_files.txt", "w") """ Get the permissions from an app. Parameters: path - The path of the app to be decompiled Returns: A sorted list of permissions """ try: app = APK(path) perms = app.get_permissions() # 去重并排序 perms = list(set(perms)) perms.sort() except Exception as e: print(e) print("path", path) error_file.write(path) return perms
def testAPKPermissions(self): from androguard.core.bytecodes.apk import APK a = APK("examples/tests/a2dp.Vol_137.apk", testzip=True) self.assertEqual(a.get_package(), "a2dp.Vol") self.assertListEqual(sorted(a.get_permissions()), sorted(["android.permission.RECEIVE_BOOT_COMPLETED", "android.permission.CHANGE_WIFI_STATE", "android.permission.ACCESS_WIFI_STATE", "android.permission.KILL_BACKGROUND_PROCESSES", "android.permission.BLUETOOTH", "android.permission.BLUETOOTH_ADMIN", "com.android.launcher.permission.READ_SETTINGS", "android.permission.RECEIVE_SMS", "android.permission.MODIFY_AUDIO_SETTINGS", "android.permission.READ_CONTACTS", "android.permission.ACCESS_COARSE_LOCATION", "android.permission.ACCESS_FINE_LOCATION", "android.permission.ACCESS_LOCATION_EXTRA_COMMANDS", "android.permission.WRITE_EXTERNAL_STORAGE", "android.permission.READ_PHONE_STATE", "android.permission.BROADCAST_STICKY", "android.permission.GET_ACCOUNTS"]))
def lim_features_categories(apk_filepath): try: apk = APK(apk_filepath) info = { 'declared permissions': sorted(apk.get_permissions()), 'activities': apk.get_activities(), 'services': apk.get_services(), 'intent filters': apk.get_intent_filters('receiver', ''), 'content providers': apk.get_providers(), 'broadcast receivers': apk.get_receivers(), 'hardware components': apk.get_features() } for category in info: info[category] = [ feature.replace(".", "_").lower() for feature in info[category] ] return info except: # We just do not process the APK pass
class App(object): """ this class describes an app """ def __init__(self, app_path, output_dir=None): """ create a App instance :param app_path: local file path of app :return: """ assert app_path is not None self.logger = logging.getLogger(self.__class__.__name__) self.app_path = app_path self.output_dir = output_dir if output_dir is not None: if not os.path.isdir(output_dir): os.makedirs(output_dir) from androguard.core.bytecodes.apk import APK self.apk = APK(self.app_path) self.package_name = self.apk.get_package() self.main_activity = self.apk.get_main_activity() self.permissions = self.apk.get_permissions() self.activities = self.apk.get_activities() self.possible_broadcasts = self.get_possible_broadcasts() self.dumpsys_main_activity = None self.hashes = self.get_hashes() def get_package_name(self): """ get package name of current app :return: """ return self.package_name def get_main_activity(self): """ get package name of current app :return: """ if self.main_activity is not None: return self.main_activity else: self.logger.warning( "Cannot get main activity from manifest. Using dumpsys result instead." ) return self.dumpsys_main_activity def get_start_intent(self): """ get an intent to start the app :return: Intent """ package_name = self.get_package_name() if self.get_main_activity(): package_name += "/%s" % self.get_main_activity() return Intent(suffix=package_name) def get_start_with_profiling_intent(self, trace_file, sampling=None): """ get an intent to start the app with profiling :return: Intent """ package_name = self.get_package_name() if self.get_main_activity(): package_name += "/%s" % self.get_main_activity() if sampling is not None: return Intent(prefix="start --start-profiler %s --sampling %d" % (trace_file, sampling), suffix=package_name) else: return Intent(prefix="start --start-profiler %s" % trace_file, suffix=package_name) def get_stop_intent(self): """ get an intent to stop the app :return: Intent """ package_name = self.get_package_name() return Intent(prefix="force-stop", suffix=package_name) def get_possible_broadcasts(self): possible_broadcasts = set() for receiver in self.apk.get_receivers(): intent_filters = self.apk.get_intent_filters('receiver', receiver) actions = intent_filters[ 'action'] if 'action' in intent_filters else [] categories = intent_filters[ 'category'] if 'category' in intent_filters else [] categories.append(None) for action in actions: for category in categories: intent = Intent(prefix='broadcast', action=action, category=category) possible_broadcasts.add(intent) return possible_broadcasts def get_hashes(self, block_size=2**8): """ Calculate MD5,SHA-1, SHA-256 hashes of APK input file @param block_size: """ md5 = hashlib.md5() sha1 = hashlib.sha1() sha256 = hashlib.sha256() f = open(self.app_path, 'rb') while True: data = f.read(block_size) if not data: break md5.update(data) sha1.update(data) sha256.update(data) return [md5.hexdigest(), sha1.hexdigest(), sha256.hexdigest()]
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) #vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() #result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] #result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 #result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 '''result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0''' result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 arr = [] s = a.get_elements("action", "name") for i in s: arr.append(i) result['intents'] = arr s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method(call) else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) #Search for the presence of intents in a given apk result['feature_vectors']['intents'] = [] n = len(INTENTS) m = len(result['intents']) for i in range(n): stri = INTENTS[i] flg = False for j in range(m): if stri in result['intents'][j]: flg = True break if flg: status = 1 else: status = 0 result['feature_vectors']['intents'].append(status) #Check for special strings in code result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) return result
class Run(Lobotomy): def __init__(self, ROOT_DIR): Lobotomy.__init__(self) self.ROOT_DIR = ROOT_DIR self.t = Terminal() self.logger = Logger() self.util = Util() self.apk = None self.package = None self.vm = None self.vmx = None self.gmx = None self.components = None self.dex = None self.strings = None self.permissions = None self.permissions_details = None self.files = None self.attack_surface = None def _cmd_completer(self, name, text, line, begidx, endidx): fn = getattr(self, 'do_'+name) if not hasattr(fn.im_func, "_expected_args"): return [] a = [arg for arg in fn.im_func._expected_args if arg.startswith(text)] return a def find_dex(self): """ Return True is classes.dex is found within the target APK. Args: None Returns: None """ if self.files: for f in self.files: if "classes" in f: return True break def process_vm(self, apk=False, dex=False): """ Process the application's classes.dex Args: param1 = boolean param2 = boolean Results: None """ try: if apk: # Make sure the APK contains a classes.dex file if self.find_dex(): self.dex = self.apk.get_dex() if self.dex: self.logger.log("info", "Loading classes.dex ...") from androguard.core.bytecodes.dvm import DalvikVMFormat from androguard.core.analysis.analysis import VMAnalysis from androguard.core.analysis.ganalysis import GVMAnalysis # Create a DalvikVMFormat instance ... # In this case self.dex will be a file type self.vm = DalvikVMFormat(self.dex) if self.vm: print(self.t.yellow("\n\t--> Loaded classes.dex (!)\n")) self.logger.log("info", "Analyzing classes.dex ...") # Analyze the DalvikVMFormat instance and return # analysis instances of VMAnalysis and GVMAnalysis self.vmx = VMAnalysis(self.vm) self.gmx = GVMAnalysis(self.vmx, None) if self.vmx and self.gmx: print(self.t.yellow("\n\t--> Analyzed classes.dex (!)\n")) # Set the analysis properties on the # DalvikVMFormat instance self.vm.set_vmanalysis(self.vmx) self.vm.set_gvmanalysis(self.gmx) # Generate xref(s) and dref(s) self.vm.create_xref() self.vm.create_dref() return else: CommandError("process_vm : Cannot analyze VM instance (!)") return else: CommandError("process_vm : Cannot load VM instance (!)") return else: CommandError("process_vm : classes.dex not found (!)") return if dex: if self.dex: from androguard.core.bytecodes.dvm import DalvikVMFormat from androguard.core.analysis.analysis import VMAnalysis from androguard.core.analysis.ganalysis import GVMAnalysis # Analyze the DalvikVMFormat instance and return # analysis instances of VMAnalysis and GVMAnalysis self.vm = DalvikVMFormat(self.util.read(self.dex)) if self.vm: print(self.t.yellow("\n\t--> Loaded {} (!)\n" .format(self.dex .split("/")[-1]))) self.logger.log("info", "Analyzing {} ..." .format(self.dex .split("/")[-1])) # Set the analysis properties on the # DalvikVMFormat instance self.vmx = VMAnalysis(self.vm) self.gmx = GVMAnalysis(self.vmx, None) if self.vmx and self.gmx: print(self.t.yellow("\n\t--> Analyzed {} (!)\n" .format(self.dex .split("/")[-1]))) # Set the analysis properties on the # DalvikVMFormat instance self.vm.set_vmanalysis(self.vmx) self.vm.set_gvmanalysis(self.gmx) # Generate xref(s) and dref(s) self.vm.create_xref() self.vm.create_dref() return else: CommandError("process_vm :" + "Cannot analyze VM instance (!)") return else: CommandError("process_vm :" + "Cannot load VM instance (!)") return else: CommandError("process_vm : classes.dex not found (!)") return except Exception as e: CommandError("process_vm : {}".format(e)) def complete_operate(self, *args): return self._cmd_completer("operate", *args) @cmd_arguments(["apk", "dex"]) def do_operate(self, *args): """ := operate apk path_to_apk := operate dex path_to_classes.dex """ # Locals arg0 = args[0].split(" ")[0] arg1 = args[0].split(" ")[1] try: if arg0 == "apk": if arg1: self.logger.log("info", "Loading : {} ..." .format(arg1.split("/")[-1])) from androguard.core.bytecodes.apk import APK self.apk = APK(arg1) if self.apk: print(self.t.yellow("\n\t--> Loaded : {} (!)\n" .format(arg1.split("/")[-1]))) self.package = self.apk.get_package() from core.brains.apk.components import Components # Load activies, services, broadcast receivers, and # content providers self.components = Components(self.apk) self.components.enumerate_components() self.permissions = self.apk.get_permissions() self.files = self.apk.get_files() self.files_type = self.apk.get_files_types() # Process DVM self.process_vm(apk=True) else: CommandError("APK not loaded (!)") elif arg0 == "dex": if arg1: self.logger.log("info", "Loading : {} ..." .format(arg1.split("/")[-1])) self.dex = arg1 self.process_vm(dex=True) except ImportError as e: CommandError("operate : {}".format(e)) def complete_surgical(self, *args): return self._cmd_completer("surgical", *args) def do_surgical(self, *args): """ := surgical """ try: if self.vm and self.vmx: from .surgical import Run run = Run(self.vm, self.vmx) run.prompt = self.t.yellow("(surgical) ") run.ruler = self.t.yellow("-") run.cmdloop() else: CommandError("classes.dex not loaded (!)") except Exception as e: CommandError("surgical : {}".format(e)) def complete_attacksurface(self, *args): return self._cmd_completer("attacksurface", *args) def do_attacksurface(self, *args): """ := attacksurface """ try: if self.apk and self.components: self.logger.log("info", "Loading attacksurface module ...") from core.brains.apk.attacksurface import AttackSurface self.attack_surface = AttackSurface(self.apk, self.components) self.attack_surface.run() # Helps with visual spacing after the results are printed print("\n") except ImportError as e: CommandError("attacksurface : {}".format(e)) def complete_permissions(self, *args): return self._cmd_completer("permissions", *args) @cmd_arguments(["list"]) def do_permissions(self, *args): """ := permissions list """ # Locals arg0 = args[0] try: if self.permissions: if args[0] == "list": self.logger.log("info", "Loading permissions ... \n") for p in self.permissions: print(self.t.yellow("\t--> {}".format(p))) print("\n") else: CommandError("Permissions not found (!)") except Exception as e: CommandError("permissions : {}".format(e)) def complete_binja(self, *args): return self._cmd_completer("binja", *args) def do_binja(self, *args): """ := binja """ try: from .binja import Run run = Run(self.files, self.apk) run.prompt = self.t.cyan("(binja) ") run.ruler = self.t.cyan("-") run.cmdloop() except Exception as e: CommandError("binja : {}".format(e)) def complete_files(self, *args): return self._cmd_completer("files", *args) @cmd_arguments(["all", "assets", "libs", "res"]) def do_files(self, *args): """ := files all := files assets := files libs := files res """ # Locals arg0 = args[0] try: if self.files: if arg0 == "assets": self.logger.log("info", "Loading files ... \n") for f in self.files: if f.startswith("assets"): print(self.t.yellow("\t--> {}".format(f))) print("\n") elif arg0 == "libs": self.logger.log("info", "Loading files ... \n") for f in self.files: if f.startswith("lib"): print(self.t.yellow("\t--> {}".format(f))) print("\n") elif arg0 == "res": self.logger.log("info", "Loading files ... \n") for f in self.files: if f.startswith("res"): print(self.t.yellow("\t--> {}".format(f))) print("\n") elif arg0 == "all": self.logger.log("info", "Loading files ... \n") for f in self.files: print(self.t.yellow("\t--> {}".format(f))) print("\n") else: CommandError("Files not populated (!)") except Exception as e: CommandError("files : {}".format(e)) def complete_strings(self, *args): return self._cmd_completer("strings", *args) @cmd_arguments(["list", "search"]) def do_strings(self, *args): """ List and search for strings found in classes.dex := strings list := strings search """ # Locals arg0 = args[0] strings = None try: if arg0 == "list": if self.vm: strings = self.vm.get_strings() if strings: for s in strings: print(self.t.cyan("--> {}".format(s.encode("utf-8", errors="ignore")))) else: CommandError("Strings not found (!)") else: CommandError("classes.dex not loaded (!)") elif arg0 == "search": if self.vm: strings = self.vm.get_strings() if strings: target = raw_input(self.t.yellow("\n\t--> Enter string : ")) for s in strings: if target in s: print(self.t.cyan("\t\t --> {}".format(s.encode("utf-8", errors="ignore")))) print("\n") else: CommandError("Strings not found (!)") else: CommandError("classes.dex not loaded (!)") except Exception as e: CommandError("strings : {}".format(e)) def complete_components(self, *args): return self._cmd_completer("components", *args) @cmd_arguments(["list"]) def do_components(self, *args): """ := components list """ # Locals arg0 = args[0] try: if arg0 == "list": if self.apk: self.logger.log("info", "Enumerating components ...\n") if self.components.activities: for a in self.components.activities: print(self.t.yellow("\t--> activity : {}" .format(a))) print("\n") if self.components.services: for s in self.components.services: print(self.t.yellow("\t--> service : {}" .format(s))) print("\n") if self.components.receivers: for r in self.components.receivers: print(self.t.yellow("\t--> receiver : {}" .format(r))) print("\n") if self.components.providers: for r in self.components.providers: print(self.t.yellow("\t--> provider : {}" .format(s))) print("\n") else: CommandError("APK not loaded (!)") except Exception as e: CommandError("components : {}".format(e)) def complete_interact(self, *args): return self._cmd_completer("interact", *args) def do_interact(self, *args): """ Drop into an interactive IPython session. := interact """ try: if self.vm and self.vmx: from core.brains.interact.interact import Interact i = Interact(self.vm, self.vmx) i.run() else: CommandError("classes.dex not loaded (!)") except Exception as e: CommandError("interact : {}".format(e.message)) def complete_class_tree(self, *args): return self._cmd_completer("class_tree", *args) def do_class_tree(self, *args): """ := class_tree """ try: if self.vm: for c in self.vm.get_classes(): # We don't care about Android support classes or resource # classes if c.name.startswith("Landroid") or \ c.name.split("/")[-1].startswith("R"): continue print("\n") print(self.t.yellow("\t--> class : {} {}".format(c.get_access_flags_string(), c.name))) for f in c.get_fields(): print(self.t.white("\t\t--> field : {} {} {}".format(f.get_access_flags_string(), f.get_descriptor(), f.name))) for m in c.get_methods(): print(self.t.cyan("\t\t\t--> method : {} {} {}".format(m.get_access_flags_string(), m.name, m.get_descriptor()))) print("\n") else: CommandError("class_tree : classes.dex not loaded (!)") except Exception as e: CommandError("class_tree : {}".format(e)) def complete_native(self, *args): return self._cmd_completer("native", *args) def do_native(self, *args): """ := native """ # Locals native_methods = list() try: if self.vm: for method in self.vm.get_methods(): if method.get_access_flags() & 0x100: native_methods.append((method.get_class_name(), method.get_name())) if native_methods: print("\n") for n in native_methods: print(self.t.cyan("\t--> {} : {}".format(n[0], n[1]))) print("\n") else: self.logger.log("info", "class_tree : classes.dex not loaded (!)") except Exception as e: CommandError("native : {}".format(e)) def complete_ui(self, *args): return self._cmd_completer("ui", *args) def do_ui(self, *args): """ := ui """ try: if self.vm and self.vmx: from core.brains.ui.terminal import TerminalApp ui = TerminalApp(self.vm, self.vmx) ui.run() except Exception as e: CommandError("ui : {}".format(e)) def complete_macro(self, *args): return self._cmd_completer("macro", *args) def do_macro(self, args): """ := macro """ # Locals directory_items = None macro = path.join(self.ROOT_DIR, "macro") selection = None apk_path = None json = None try: print("\n") directory_items = listdir(macro) for i, item in enumerate(directory_items): print(self.t.cyan("\t--> [{}] {}" .format(i, item))) print("\n") selection = raw_input(self.t.yellow("[{}] Select config : ".format(datetime.now()))) try: index = int(selection) except ValueError: index = -1 print("\n") if selection: for i, item in enumerate(directory_items): if selection == item or i == index: selection = item break with open("".join([macro, "/", selection]), "rb") as config: # Load the config as JSON json = loads(config.read()) if json: for k, v in json.items(): if k == "apk": if v: apk_path = str(v) # Call operate() with the path to apk self.do_operate("apk {}" .format(apk_path)) return else: CommandError("macro : Path to APK not found in {}" .format(selection)) else: CommandError("macro : Error loading {} as JSON" .format(selection)) except Exception as e: CommandError("macro : {}".format(e))
class Run(Lobotomy): def __init__(self): Lobotomy.__init__(self) self.t = Terminal() self.logger = Logger() self.util = Util() self.apk = None self.package = None self.vm = None self.vmx = None self.gmx = None self.components = None self.dex = None self.strings = None self.permissions = None self.permissions_details = None self.files = None self.attack_surface = None def find_dex(self): """ Return True is classes.dex is found within the target APK. Args: None Returns: None """ if self.files: for f in self.files: if "classes" in f: return True break def process_vm(self): """ Process the application's classes.dex Args: None Results: None """ # Make sure classes.dex exists if self.find_dex(): self.dex = self.apk.get_dex() # Analyze classes.dex # TODO Throw in a progress bar, this can take awhile if self.dex: self.logger.log("info", "Loading classes.dex ...") from androguard.core.bytecodes.dvm import DalvikVMFormat from androguard.core.analysis.analysis import VMAnalysis from androguard.core.analysis.ganalysis import GVMAnalysis # Create a new virtual machine instance self.vm = DalvikVMFormat(self.dex) if self.vm: print(self.t.yellow("\n\t--> Loaded classes.dex (!)\n")) self.logger.log("info", "Analyzing classes.dex ...") # Analyze the virtual machine instance self.vmx = VMAnalysis(self.vm) self.gmx = GVMAnalysis(self.vmx, None) if self.vmx and self.gmx: print(self.t.yellow("\n\t--> Analyzed classes.dex (!)\n")) self.vm.set_vmanalysis(self.vmx) self.vm.set_gvmanalysis(self.gmx) # Generate xref(s) self.vm.create_xref() self.vm.create_dref() else: CommandError("Cannot analyze VM instance (!)") else: CommandError("Cannot load VM instance (!)") else: CommandError("classes.dex not found (!)") def do_operate(self, args): """ := operate apk path_to_apk := operate dex path_to_classes.dex """ try: if args.split()[0] == "apk": if args.split()[1]: self.logger.log("info", "Loading : {} ...".format(args.split()[1].split("/")[-1])) from androguard.core.bytecodes.apk import APK self.apk = APK(args.split()[1]) if self.apk: print(self.t.yellow("\n\t--> Loaded : {} (!)\n".format(args.split()[1].split("/")[-1]))) self.package = self.apk.get_package() from core.brains.apk.components import Components # Load activies, services, broadcast receivers, and # content providers self.components = Components(self.apk) self.components.enumerate_components() self.permissions = self.apk.get_permissions() self.files = self.apk.get_files() self.files_type = self.apk.get_files_types() # Process virtual machine self.process_vm() else: CommandError("APK not loaded (!)") else: CommandError("Unkown command (!)") except ImportError as e: CommandError(e.message) except IndexError as e: CommandError("Not enough arguments (!)") def do_surgical(self, args): """ := surgical """ try: if self.vm and self.vmx: from .surgical import Run run = Run(self.vm, self.vmx) run.prompt = self.t.yellow("(surgical) ") run.ruler = self.t.yellow("-") run.cmdloop() else: CommandError("classes.dex not loaded (!)") except Exception as e: CommandError(e.message) def do_attacksurface(self, args): """ := attacksurface """ try: if self.apk and self.components: self.logger.log("info", "Loading attacksurface module ...") from core.brains.apk.attacksurface import AttackSurface self.attack_surface = AttackSurface(self.apk, self.components) self.attack_surface.run() except ImportError as e: CommandError(e.message) def do_permissions(self, args): """ := permissions list """ try: if self.permissions: if args.split()[0] == "list": self.logger.log("info", "Loading permissions ... \n") for p in self.permissions: print(self.t.yellow("\t--> {}".format(p))) print("\n") else: CommandError("Permissions not found (!)") except Exception as e: CommandError(e.message) def do_files(self, args): """ := files all := files assets := files libs := files res """ try: if self.files: if args.split()[0]: if args.split()[0] == "assets": self.logger.log("info", "Loading files ... \n") for f in self.files: if f.startswith("assets"): print(self.t.yellow("\t--> {}".format(f))) print("\n") elif args.split()[0] == "libs": self.logger.log("info", "Loading files ... \n") for f in self.files: if f.startswith("lib"): print(self.t.yellow("\t--> {}".format(f))) print("\n") elif args.split()[0] == "res": self.logger.log("info", "Loading files ... \n") for f in self.files: if f.startswith("res"): print(self.t.yellow("\t--> {}".format(f))) print("\n") elif args.split()[0] == "all": self.logger.log("info", "Loading files ... \n") for f in self.files: print(self.t.yellow("\t--> {}".format(f))) print("\n") else: CommandError("Files not populated (!)") except Exception as e: CommandError(e.message) def do_strings(self, args): """ List and search for strings found in classes.dex := strings list := strings search """ # Locals strings = None try: if args.split()[0] == "list": if self.vm: strings = self.vm.get_strings() if strings: for s in strings: print(self.t.cyan("--> {}".format(s.encode("utf-8")))) else: CommandError("Strings not found (!)") else: CommandError("classes.dex not loaded (!)") elif args.split()[0] == "search": if self.vm: strings = self.vm.get_strings() if strings: target = raw_input(self.t.yellow("\n\t--> Enter string : \n")) for s in strings: if target in s: print(self.t.cyan("\t\t --> {}".format(s))) print("\n") else: CommandError("Strings not found (!)") else: CommandError("classes.dex not loaded (!)") else: CommandError("Command not found (!)") except Exception as e: # We might be see an exception like this: # 'utf8' codec can't decode byte 0xc0 in position 0: invalid start byte raise e CommandError(e.message) def do_components(self, args): """ := components list """ try: if args.split()[0] == "list": if self.apk: self.logger.log("info", "Enumerating components ...\n") if self.components.activities: for a in self.components.activities: print(self.t.yellow("\t--> activity : {}".format(a))) print("\n") if self.components.services: for s in self.components.services: print(self.t.yellow("\t--> service : {}".format(s))) print("\n") if self.components.receivers: for r in self.components.receivers: print(self.t.yellow("\t--> receiver : {}".format(s))) print("\n") if self.components.providers: for r in self.components.providers: print(self.t.yellow("\t--> provider : {}".format(s))) print("\n") else: CommandError("APK not loaded (!)") else: CommandError("Command not found (!)") except Exception as e: CommandError(e.message) def do_interact(self, args): """ Drop into an interactive IPython session. := interact """ try: if self.vm and self.vmx: from core.brains.interact.interact import Interact i = Interact(self.vm, self.vmx) i.run() else: CommandError("classes.dex not loaded (!)") except Exception as e: CommandError(e.message) def do_macro(self, args): """ """ return
class VectorGroupPermis: def __init__(self, apk, pos): self.data = [] self.don = [] self.apk = APK(str(apk)) self.pos = pos self.main() # On recupere les permitions des APK def getVectorOfXml(self): data_permission = [] for elt in self.apk.get_permissions(): permis = elt.split('.') if 'permission' in permis: data_permission.append(permis[-1]) perm.append(data_permission) return perm # construction de notre lcs pour tous les apks malware def lcs(self): S = self.getVectorOfXml() first_lcs = S[0] for liste in S: second_lcs = liste m = len(first_lcs) n = len(second_lcs) counter = [[0] * (n + 1) for x in range(m + 1)] longest = 0 lcs_set = list() for i in range(m): for j in range(n): if first_lcs[i] == second_lcs[j]: c = counter[i][j] + 1 counter[i + 1][j + 1] = c if c > longest: lcs_set = list() longest = c lcs_set.append(first_lcs[i - c + 1:i + 1]) elif c == longest: lcs_set.append(first_lcs[i - c + 1:i + 1]) else: continue first_lcs = lcs_set return lcs_set # Creer le fichier excel def setVector(self, vector): liste = [] for colVal in vector: liste.append(colVal) for item in liste: for k, elt in enumerate(item): ligne = feuil.row(self.pos) ligne.write(k, elt) book.save('lcsgoodware.xls') def main(self): a = self.lcs() self.setVector(a) print(a)
def reverse(nameApk): # doc file config with open(config_file, "r+") as f: dataConfig = json.load(f) maxLabelsNum = dataConfig['maxLabelsNum'] # Label tong hop # with open(LabelsNum_file, "r+") as file_LabeslNum: # LABELSNUMANDTEXT = json.load(file_LabeslNum) # Load Android API packages and classes global API_PACKAGES_LIST, API_CLASSES_LIST, API_SYSTEM_COMMANDS ############################################################ # READING PACKAGES, CLASSES AND SYSTEM COMMANDS ############################################################ package_file = load_file(str(package_index_file)) API_PACKAGES_LIST = [x.strip() for x in package_file] class_file = load_file(str(classes_index_file)) API_CLASSES_LIST = [x.strip() for x in class_file] commands_file = load_file(str(system_commands_file)) API_SYSTEM_COMMANDS = [x.strip() for x in commands_file] static_analysis_dict = collections.OrderedDict() try: analyze_apk = os.path.join(TEMP,nameApk) # Getting the name of the folder that contains all apks and folders with apks base_folder = TEMP.split("/")[-1] apk_filename = join_dir(base_folder, analyze_apk.replace(TEMP, '')) apk_filename = apk_filename.replace("//", "/") apk_name_no_extensions = "".join(apk_filename.split("/")[-1].split(".")[:-1]) # export to monggoDB # if os.path.isfile(join_dir(output_folder, apk_filename.split("/")[-1].replace('.apk', '-analysis.json'))): # database[apk_filename.replace('.apk', '')] = json.load( # open(join_dir(output_folder, apk_filename.split("/")[-1]. # replace('.apk', '-analysis.json')))) # continue pre_static_dict = collections.OrderedDict() pre_static_dict['Filename'] = apk_filename hasher_md5 = hashlib.md5() hasher_sha256 = hashlib.sha256() hasher_sha1 = hashlib.sha1() with open(analyze_apk, 'rb') as afile: buf = afile.read() hasher_md5.update(buf) hasher_sha256.update(buf) hasher_sha1.update(buf) md5 = hasher_md5.hexdigest() sha256 = hasher_sha256.hexdigest() sha1 = hasher_sha1.hexdigest() pre_static_dict["md5"] = md5 pre_static_dict["sha256"] = sha256 pre_static_dict["sha1"] = sha1 """ if label is not None: pre_static_dict["Label"] = label else: pre_static_dict["Label"] = "/".join(apk_filename.split("/")[:-1]) """ pre_static_dict["VT_positives"] = None apk_Oject = APK(analyze_apk) # get package name static_analysis_dict['Package_name'] = apk_Oject.get_package() # get Permission static_analysis_dict['Permissions'] = apk_Oject.get_permissions() # Activities try: list_activities = apk_Oject.get_activities() except UnicodeEncodeError: list_activities = [] # get Main ACtivity static_analysis_dict['Main_activity'] = apk_Oject.get_main_activity() # Receivers try: list_receivers = apk_Oject.get_receivers() except UnicodeEncodeError: list_receivers = [] # Services try: list_services = apk_Oject.get_services() except UnicodeEncodeError: list_services = [] # API calls and Strings list_smali_api_calls, list_smali_strings = read_strings_and_apicalls(analyze_apk, API_PACKAGES_LIST, API_CLASSES_LIST) for api_call in list_smali_api_calls.keys(): new_api_call = '.'.join(api_call.split(".")[:-1]) if new_api_call in list_smali_api_calls.keys(): list_smali_api_calls[new_api_call] = list_smali_api_calls[new_api_call] + list_smali_api_calls[ api_call] else: list_smali_api_calls[new_api_call] = list_smali_api_calls[api_call] del list_smali_api_calls[api_call] static_analysis_dict['API_calls'] = list_smali_api_calls static_analysis_dict['Strings'] = Counter(filter(None, list_smali_strings)) # API packages API_packages_dict = collections.OrderedDict() android_list_packages_lenghts = [len(x.split(".")) for x in API_PACKAGES_LIST] list_api_calls_keys = list_smali_api_calls.keys() for api_call in list_api_calls_keys: score = 0 package_chosen = None for i, package in enumerate(API_PACKAGES_LIST): len_package = android_list_packages_lenghts[i] if api_call.startswith(package) and len_package > score: score = len_package package_chosen = package if package_chosen is not None: if not package_chosen in API_packages_dict.keys(): API_packages_dict[package_chosen] = list_smali_api_calls[api_call] else: API_packages_dict[package_chosen] += list_smali_api_calls[api_call] static_analysis_dict['API_packages'] = API_packages_dict # Intents try: static_analysis_dict['Intents'] = intents_analysis(join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml')) except: static_analysis_dict['Intents'] = {'Failed to extract intents': 0} # Intents of activities intents_activities = collections.OrderedDict() for activity in list_activities: intents_activities[activity] = check_for_intents(join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml'), activity, 'activity') static_analysis_dict['Activities'] = intents_activities # Intents of services intents_services = collections.OrderedDict() for service in list_services: intents_services[service] = check_for_intents(join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml'), service, 'service') static_analysis_dict['Services'] = intents_services # Intents of receivers intents_receivers = collections.OrderedDict() for intent in list_receivers: intents_receivers[intent] = check_for_intents(join_dir(analyze_apk.replace('.apk', '/'), 'AndroidManifest.xml'), intent, 'receiver') static_analysis_dict['Receivers'] = intents_receivers static_analysis_dict['Receivers'] = intents_receivers apk_total_analysis = collections.OrderedDict([("Pre_static_analysis", pre_static_dict), ("Static_analysis", static_analysis_dict)]) # # save_as_json(apk_total_analysis, output_name=join_dir(output_folder, apk_name_no_extensions + # "-analysis.json")) row = standardData(pre_static_dict, static_analysis_dict) csvFileClient = open(DataCSVClient + md5 + '.csv', 'w+', newline='') writer = csv.writer(csvFileClient, delimiter=',') writer.writerow(row) csvFileClient.close() delAPk(analyze_apk) if checkMerge(DataCSVClient, dataConfig['mergeCSV']): mergeCSV() return md5, apk_total_analysis except Exception as e: print('Exception: ', e) return 'Error', 'No features'
def getFeatures(source_directory): ############################################################ # Label tong hop with open(LabelsNum_file, "r+") as file_LabeslNum: LABELSNUMANDTEXT = json.load(file_LabeslNum) # doc file config with open(config_file, "r+") as f: dataConfig = json.load(f) maxLabelsNum = dataConfig['maxLabelsNum'] #lay part Data partData = dataConfig['partData'] time = datetime.datetime.now() partDataFile = str(partData) + '_' + str(time).strip() + '.csv' csvFile = open(r'DataCSV/' + partDataFile, 'w+', newline='') writer = csv.writer(csvFile, delimiter=',') source_directory = str(source_directory) #if not os.path.exists(output_folder): # os.makedirs(output_folder) # Load Android API packages and classes global API_PACKAGES_LIST, API_CLASSES_LIST, API_SYSTEM_COMMANDS ############################################################ # get name and labels ARRNAME, ARRLABELS = load_NameandLabels(labels) ############################################################ # READING PACKAGES, CLASSES AND SYSTEM COMMANDS ############################################################ package_file = load_file(str(package_index_file)) API_PACKAGES_LIST = [x.strip() for x in package_file] class_file = load_file(str(classes_index_file)) API_CLASSES_LIST = [x.strip() for x in class_file] commands_file = load_file(str(system_commands_file)) API_SYSTEM_COMMANDS = [x.strip() for x in commands_file] ############################################################ ############################################################ apk_list = list_files(source_directory, '*.apk') for analyze_apk in tqdm(apk_list): # Getting the name of the folder that contains all apks and folders with apks base_folder = source_directory.split("/")[-1] apk_filename = join_dir(base_folder, analyze_apk.replace(source_directory, '')) apk_filename = apk_filename.replace("//", "/") apk_name_no_extensions = "".join( apk_filename.split("/")[-1].split(".")[:-1]) # export to monggoDB #if os.path.isfile(join_dir(output_folder, apk_filename.split("/")[-1].replace('.apk', '-analysis.json'))): # database[apk_filename.replace('.apk', '')] = json.load( # open(join_dir(output_folder, apk_filename.split("/")[-1]. # replace('.apk', '-analysis.json')))) # continue pre_static_dict = collections.OrderedDict() pre_static_dict['Filename'] = apk_filename hasher_md5 = hashlib.md5() hasher_sha256 = hashlib.sha256() hasher_sha1 = hashlib.sha1() with open(analyze_apk, 'rb') as afile: buf = afile.read() hasher_md5.update(buf) hasher_sha256.update(buf) hasher_sha1.update(buf) md5 = hasher_md5.hexdigest() sha256 = hasher_sha256.hexdigest() sha1 = hasher_sha1.hexdigest() pre_static_dict["md5"] = md5 pre_static_dict["sha256"] = sha256 pre_static_dict["sha1"] = sha1 """ if label is not None: pre_static_dict["Label"] = label else: pre_static_dict["Label"] = "/".join(apk_filename.split("/")[:-1]) """ pre_static_dict["VT_positives"] = None try: androguard_apk_object = APK(analyze_apk) except Exception: print("ERROR in APK: " + apk_name_no_extensions) continue static_analysis_dict = collections.OrderedDict() # Package name static_analysis_dict[ 'Package name'] = androguard_apk_object.get_package() # Permissions static_analysis_dict[ 'Permissions'] = androguard_apk_object.get_permissions() # Activities try: list_activities = androguard_apk_object.get_activities() except UnicodeEncodeError: list_activities = [] # Main activity static_analysis_dict[ 'Main activity'] = androguard_apk_object.get_main_activity() # Receivers try: list_receivers = androguard_apk_object.get_receivers() except UnicodeEncodeError: list_receivers = [] # Services try: list_services = androguard_apk_object.get_services() except UnicodeEncodeError: list_services = [] # API calls and Strings list_smali_api_calls, list_smali_strings = read_strings_and_apicalls( analyze_apk, API_PACKAGES_LIST, API_CLASSES_LIST) for api_call in list_smali_api_calls.keys(): new_api_call = '.'.join(api_call.split(".")[:-1]) if new_api_call in list_smali_api_calls.keys(): list_smali_api_calls[new_api_call] = list_smali_api_calls[ new_api_call] + list_smali_api_calls[api_call] else: list_smali_api_calls[new_api_call] = list_smali_api_calls[ api_call] del list_smali_api_calls[api_call] static_analysis_dict['API calls'] = list_smali_api_calls static_analysis_dict['Strings'] = Counter( filter(None, list_smali_strings)) # API packages API_packages_dict = collections.OrderedDict() android_list_packages_lenghts = [ len(x.split(".")) for x in API_PACKAGES_LIST ] list_api_calls_keys = list_smali_api_calls.keys() for api_call in list_api_calls_keys: score = 0 package_chosen = None for i, package in enumerate(API_PACKAGES_LIST): len_package = android_list_packages_lenghts[i] if api_call.startswith(package) and len_package > score: score = len_package package_chosen = package if package_chosen is not None: if not package_chosen in API_packages_dict.keys(): API_packages_dict[package_chosen] = list_smali_api_calls[ api_call] else: API_packages_dict[package_chosen] += list_smali_api_calls[ api_call] static_analysis_dict['API packages'] = API_packages_dict # Intents try: static_analysis_dict['Intents'] = intents_analysis( join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml')) except: static_analysis_dict['Intents'] = {'Failed to extract intents': 0} # Intents of activities intents_activities = collections.OrderedDict() for activity in list_activities: intents_activities[activity] = check_for_intents( join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml'), activity, 'activity') static_analysis_dict['Activities'] = intents_activities # Intents of services intents_services = collections.OrderedDict() for service in list_services: intents_services[service] = check_for_intents( join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml'), service, 'service') static_analysis_dict['Services'] = intents_services # Intents of receivers intents_receivers = collections.OrderedDict() for intent in list_receivers: intents_receivers[intent] = check_for_intents( join_dir(analyze_apk.replace('.apk', '/'), 'AndroidManifest.xml'), intent, 'receiver') static_analysis_dict['Receivers'] = intents_receivers row = standardData(pre_static_dict, static_analysis_dict) if md5 in ARRNAME: index = -1 if md5 in ARRNAME: index = ARRNAME.index(md5) if sha256 in ARRNAME: index = ARRNAME.index(sha256) if index != -1: label = ARRLABELS[index] try: if label not in LABELSNUMANDTEXT: if 'SINGLETON' in label: continue continue # maxLabelsNum += 1 # temp = collections.OrderedDict() # temp[label] = maxLabelsNum # LABELSNUMANDTEXT[label] = maxLabelsNum except: continue labelNum = [LABELSNUMANDTEXT[label]] labelNum.extend(row) writer.writerow(labelNum) # apk_total_analysis = collections.OrderedDict([("Pre_static_analysis", pre_static_dict), # ("Static_analysis", static_analysis_dict)]) # # save_as_json(apk_total_analysis, output_name=join_dir(output_folder, apk_name_no_extensions + # "-analysis.json")) #save labelsnum neu co them nhan moo with open(str(LabelsNum_file), 'w+') as fp: json.dump(LABELSNUMANDTEXT, fp, indent=4) fp.close() # Save data config partData += 1 dataConfig['partData'] = partData dataConfig['maxLabelsNum'] = maxLabelsNum with open(str(config_file), 'w+') as fp: json.dump(dataConfig, fp, indent=4) fp.close() csvFile.close()
class StaticAnalysis: def __init__(self, apk_path=None): self.apk = None self.apk_path = apk_path self.signatures = None self.compiled_tracker_signature = None self.classes = None self.app_details = None if apk_path is not None: self.load_apk() def _compile_signatures(self): """ Compiles the regex associated to each signature, in order to speed up the trackers detection. :return: A compiled list of signatures. """ self.compiled_tracker_signature = [] try: self.compiled_tracker_signature = [ re.compile(track.code_signature) for track in self.signatures ] except TypeError: print("self.signatures is not iterable") def load_trackers_signatures(self): """ Load trackers signatures from the official Exodus database. :return: a dictionary containing signatures. """ self.signatures = [] exodus_url = "https://reports.exodus-privacy.eu.org/api/trackers" r = requests.get(exodus_url) data = r.json() for e in data['trackers']: self.signatures.append( namedtuple( 'tracker', data['trackers'][e].keys())(*data['trackers'][e].values())) self._compile_signatures() logging.debug('{} trackers signatures loaded'.format( len(self.signatures))) def load_apk(self): """ Load the APK file. """ if self.apk is None: self.apk = APK(self.apk_path) def get_embedded_classes(self): """ Get the list of Java classes embedded into all DEX files. :return: array of Java classes names as string """ if self.classes is not None: return self.classes class_regex = re.compile(r'classes.*\.dex') with TemporaryDirectory() as tmp_dir: with zipfile.ZipFile(self.apk_path, "r") as apk_zip: class_infos = (info for info in apk_zip.infolist() if class_regex.search(info.filename)) for info in class_infos: apk_zip.extract(info, tmp_dir) dexdump = which('dexdump') cmd = '{} {}/classes*.dex | perl -n -e\'/[A-Z]+((?:\w+\/)+\w+)/ && print "$1\n"\'|sort|uniq'.format( dexdump, tmp_dir) try: self.classes = subprocess.check_output( cmd, stderr=subprocess.STDOUT, shell=True, universal_newlines=True).splitlines() logging.debug('{} classes found in {}'.format( len(self.classes), self.apk_path)) return self.classes except subprocess.CalledProcessError: logging.error('Unable to decode {}'.format(self.apk_path)) raise Exception('Unable to decode the APK') def detect_trackers_in_list(self, class_list): """ Detect embedded trackers in the provided classes list. :return: list of embedded trackers """ if self.signatures is None: self.load_trackers_signatures() def _detect_tracker(sig, tracker, class_list): for clazz in class_list: if sig.search(clazz): return tracker return None results = [] args = [(self.compiled_tracker_signature[index], tracker, class_list) for (index, tracker) in enumerate(self.signatures) if len(tracker.code_signature) > 3] for res in itertools.starmap(_detect_tracker, args): if res: results.append(res) trackers = [t for t in results if t is not None] logging.debug('{} trackers detected in {}'.format( len(trackers), self.apk_path)) return trackers def detect_trackers(self, class_list_file=None): """ Detect embedded trackers. :return: list of embedded trackers """ if self.signatures is None: self.load_trackers_signatures() if class_list_file is None: return self.detect_trackers_in_list(self.get_embedded_classes()) else: with open(class_list_file, 'r') as classes_file: classes = classes_file.readlines() return self.detect_trackers_in_list(classes) def get_version(self): """ Get the application version name :return: version name """ self.load_apk() return self.apk.get_androidversion_name() def get_version_code(self): """ Get the application version code :return: version code """ self.load_apk() return self.apk.get_androidversion_code() def get_permissions(self): """ Get application permissions :return: application permissions list """ self.load_apk() return self.apk.get_permissions() def get_app_name(self): """ Get application name :return: application name """ self.load_apk() return self.apk.get_app_name() def get_package(self): """ Get application package :return: application package """ self.load_apk() return self.apk.get_package() def get_libraries(self): """ Get application libraries :return: application libraries list """ self.load_apk() return self.apk.get_libraries() def get_icon_path(self): """ Get the icon path in the ZIP archive :return: icon path in the ZIP archive """ self.load_apk() return self.apk.get_app_icon() def get_application_details(self): """ Get the application details like creator, number of downloads, etc. :param handle: application handle :return: application details dictionary """ self.load_apk() if self.app_details is not None: return self.app_details details = get_details_from_gplaycli(self.get_package()) if details is not None: self.app_details = details return details def _get_icon_from_details(self, path): """ Get icon from applications details dictionary :param path: path where to write the icon file :return: icon path :raises Exception: if unable to find icon """ details = self.get_application_details() if details is not None: for i in details.get('images'): if i.get('imageType') == 4: f = requests.get(i.get('url')) with open(path, mode='wb') as fp: fp.write(f.content) if os.path.isfile(path) and os.path.getsize(path) > 0: return path raise Exception('Unable to download the icon from details') @staticmethod def _render_drawable_to_png(self, bxml, path): ap = axml.AXMLPrinter(bxml) print(ap.get_buff()) def save_icon(self, path): """ Extract the icon from the ZIP archive and save it at the given path :param path: destination path of the icon :return: destination path of the icon, None in case of error """ try: icon = self.get_icon_path() if icon is None: raise Exception('Unable to get icon path') with zipfile.ZipFile(self.apk_path) as z: with open(path, 'wb') as f: f.write(z.read(icon)) with Image.open(path) as _: logging.info('Get icon from APK: success') return path except Exception: logging.warning('Unable to get the icon from the APK') return None # TODO: Set this back once details download is working again # logging.warning('Downloading icon from details') # try: # saved_path = self._get_icon_from_details(path) # logging.debug('Icon downloaded from application details') # return saved_path # except Exception as e: # logging.warning(e) def get_icon_phash(self): """ Get the perceptual hash of the application icon :return: the perceptual hash, empty string in case of error """ with NamedTemporaryFile() as ic: path = self.save_icon(ic.name) if path is None: logging.error('Unable to save the icon') return '' return self.get_phash(ic.name) @staticmethod def get_phash(image_name): """ Get the perceptual hash of the given image :param image_name: name of the image file :return: the perceptual hash, empty string in case of error """ dhash.force_pil() # Force PIL try: image = Image.open(image_name).convert("RGBA") row, col = dhash.dhash_row_col(image, size=PHASH_SIZE) return row << (PHASH_SIZE * PHASH_SIZE) | col except IOError as e: logging.error(e) return '' @staticmethod def get_icon_similarity(phash_origin, phash_candidate): """ Get icons similarity score [0,1.0] :param phash_origin: original icon :param phash_candidate: icon to be compared :return: similarity score [0,1.0] """ diff = dhash.get_num_bits_different(phash_origin, phash_candidate) return 1 - 1. * diff / (PHASH_SIZE * PHASH_SIZE * 2) def get_application_universal_id(self): parts = [self.get_package()] for c in self.get_certificates(): parts.append(c.fingerprint.upper()) return sha1(' '.join(parts).encode('utf-8')).hexdigest().upper() def get_certificates(self): certificates = [] def _my_name_init(self, oid, value, _type=_SENTINEL): if not isinstance(oid, ObjectIdentifier): raise TypeError( "oid argument must be an ObjectIdentifier instance.") if not isinstance(value, six.text_type): raise TypeError("value argument must be a text type.") if len(value) == 0: raise ValueError("Value cannot be an empty string") if _type == _SENTINEL: _type = _NAMEOID_DEFAULT_TYPE.get(oid, _ASN1Type.UTF8String) if not isinstance(_type, _ASN1Type): raise TypeError("_type must be from the _ASN1Type enum") self._oid = oid self._value = value self._type = _type NameAttribute.__init__ = _my_name_init signs = self.apk.get_signature_names() for s in signs: c = self.apk.get_certificate(s) cert = Certificate(c) certificates.append(cert) return certificates def get_apk_size(self): """ Get the APK file size in bytes :return: APK file size """ return os.path.getsize(self.apk_path) def get_sha256(self): """ Get the sha256sum of the APK file :return: hex sha256sum """ BLOCKSIZE = 65536 hasher = sha256() with open(self.apk_path, 'rb') as apk: buf = apk.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = apk.read(BLOCKSIZE) return hasher.hexdigest() def save_embedded_classes_in_file(self, file_path): """ Save list of embedded classes in file. :param file_path: file to write """ with open(file_path, 'w+') as f: f.write('\n'.join(self.get_embedded_classes())) def print_apk_infos(self): """ Print APK information """ permissions = self.get_permissions() libraries = self.get_libraries() certificates = self.get_certificates() print("=== Information") print('- APK path: {}'.format(self.apk_path)) print('- APK sum: {}'.format(self.get_sha256())) print('- App version: {}'.format(self.get_version())) print('- App version code: {}'.format(self.get_version_code())) print('- App UID: {}'.format(self.get_application_universal_id())) print('- App name: {}'.format(self.get_app_name())) print('- App package: {}'.format(self.get_package())) print('- App permissions: {}'.format(len(permissions))) for perm in permissions: print(' - {}'.format(perm)) print('- App libraries:') for lib in libraries: print(' - {}'.format(lib)) print('- Certificates: {}'.format(len(certificates))) for cert in certificates: print(' - {}'.format(cert)) def print_embedded_trackers(self): """ Print detected trackers """ trackers = self.detect_trackers() print('=== Found trackers: {}'.format(len(trackers))) for t in trackers: print(' - {}'.format(t.name))
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except Exception as e: print e return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] # result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 # result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 # result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 # result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method_by_name(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) result['feature_vectors']['others'] = [ # result['is_reflection_code'], # result['is_crypto_code'], # result['is_native_code'], result['is_obfuscation'], result['is_database'], # result['is_dyn_code'] ] return result