def run_decompiler(d, dx, decompiler): """ Run the decompiler on a specific analysis :param d: the DalvikVMFormat object :type d: :class:`DalvikVMFormat` object :param dx: the analysis of the format :type dx: :class:`VMAnalysis` object :param decompiler: the type of decompiler to use ("dad", "dex2jad", "ded") :type decompiler: string """ if decompiler is not None: decompiler = decompiler.lower() if decompiler == "dex2jad": d.set_decompiler( DecompilerDex2Jad(d, androconf.CONF["PATH_DEX2JAR"], androconf.CONF["BIN_DEX2JAR"], androconf.CONF["PATH_JAD"], androconf.CONF["BIN_JAD"], androconf.CONF["TMP_DIRECTORY"])) elif decompiler == "ded": d.set_decompiler( DecompilerDed(d, androconf.CONF["PATH_DED"], androconf.CONF["BIN_DED"], androconf.CONF["TMP_DIRECTORY"])) elif decompiler == "dad": d.set_decompiler(DecompilerDAD(d, dx)) else: self.log( 'info', "Unknown decompiler, use DAD decompiler by default") d.set_decompiler(DecompilerDAD(d, dx))
def addDEY(self, filename, data, dx=None): """ Add an ODEX file to the session and run the analysis """ digest = hashlib.sha256(data).hexdigest() log.debug("add DEY:%s" % digest) d = DalvikOdexVMFormat(data) log.debug("added DEY:%s" % digest) self.analyzed_files[filename].append(digest) self.analyzed_digest[digest] = filename self.analyzed_dex[digest] = d if self.export_ipython: d.create_python_export() if dx is None: dx = Analysis() dx.add(d) dx.create_xref() for d in dx.vms: # TODO: allow different decompiler here! d.set_decompiler(DecompilerDAD(d, dx)) d.set_vmanalysis(dx) self.analyzed_vms[digest] = dx return digest, d, dx
def extract_permissions(file): a = APK(file) d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) return a.get_permissions()
def extract_features(file_path): #result = [] try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uAnalysis(vm) d.set_Analysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None return a.get_permissions() #it will return permission
def addDEX(self, filename, data, dx=None, postpone_xref=False): """ Add a DEX file to the Session and run analysis. :param filename: the (file)name of the DEX file :param data: binary data of the dex file :param dx: an existing Analysis Object (optional) :param postpone_xref: True if no xref shall be created, and will be called manually :return: A tuple of SHA256 Hash, DalvikVMFormat Object and Analysis object """ digest = hashlib.sha256(data).hexdigest() log.debug("add DEX:%s" % digest) log.debug("Parsing format ...") d = DalvikVMFormat(data) log.debug("added DEX:%s" % digest) self.analyzed_files[filename].append(digest) self.analyzed_digest[digest] = filename self.analyzed_dex[digest] = d if dx is None: dx = Analysis() dx.add(d) if not postpone_xref: dx.create_xref() # TODO: If multidex: this will called many times per dex, even if already set for d in dx.vms: # TODO: allow different decompiler here! d.set_decompiler(DecompilerDAD(d, dx)) d.set_vmanalysis(dx) self.analyzed_vms[digest] = dx if self.export_ipython: log.debug("Exporting in ipython") d.create_python_export() return digest, d, dx
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) #vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() #result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] #result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 #result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 '''result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0''' result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 arr = [] s = a.get_elements("action", "name") for i in s: arr.append(i) result['intents'] = arr s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method(call) else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) #Search for the presence of intents in a given apk result['feature_vectors']['intents'] = [] n = len(INTENTS) m = len(result['intents']) for i in range(n): stri = INTENTS[i] flg = False for j in range(m): if stri in result['intents'][j]: flg = True break if flg: status = 1 else: status = 0 result['feature_vectors']['intents'].append(status) #Check for special strings in code result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) return result
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except Exception as e: print e return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] # result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 # result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 # result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 # result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method_by_name(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) result['feature_vectors']['others'] = [ # result['is_reflection_code'], # result['is_crypto_code'], # result['is_native_code'], result['is_obfuscation'], result['is_database'], # result['is_dyn_code'] ] return result
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() #result['strings'] = d.get_strings() #result['class_names'] = [c.get_name() for c in d.get_classes()] #result['method_names'] = [m.get_name() for m in d.get_methods()] #result['field_names'] = [f.get_name() for f in d.get_fields()] class_names = [c.get_name() for c in d.get_classes()] method_names = [m.get_name() for m in d.get_methods()] field_names = [ f.get_name() for f in d.get_fields()] result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] #s_list.extend(result['class_names']) #s_list.extend(result['method_names']) #s_list.extend(result['field_names']) s_list.extend(class_names) s_list.extend(method_names) s_list.extend(method_names) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.tainted_packages.search_methods(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) opt_seq = [] for m in d.get_methods(): for i in m.get_instructions(): opt_seq.append(i.get_name()) optngramlist = [tuple(opt_seq[i:i+NGRAM]) for i in xrange(len(opt_seq) - NGRAM)] optngram = Counter(optngramlist) optcodes = dict() tmpCodes = dict(optngram) #for k,v in optngram.iteritems(): # if v>=NGRAM_THRE: #optcodes[str(k)] = v # optcodes[str(k)] = 1 tmpCodes = sorted(tmpCodes.items(),key =lambda d:d[1],reverse=True) for value in tmpCodes[:NGRAM_THRE]: optcodes[str(value[0])] = 1 result['feature_vectors']['opt_codes'] = optcodes return result
''' Created on Jun 8, 2014 @author: lyx ''' from androguard.core.bytecodes.apk import APK from androguard.core.bytecodes import dvm from androguard.core.analysis import analysis from androguard.decompiler.decompiler import DecompilerDAD if __name__ == '__main__': apk = APK('../sampleapk/MyTrojan.apk') d = dvm.DalvikVMFormat(apk.get_dex()) dx = analysis.uVMAnalysis(d) d.set_decompiler( DecompilerDAD( d, dx ) ) for current_class in d.get_classes(): s = current_class#.source() print s print s.source() '''for current_method in d.get_methods(): # @IndentOk x = current_method.get_code() ins = x.get_bc().get_instructions() i = 0 for s in ins: print s.show(i) i += 1 #apk = analyzeAPK('./sampleapk/k9-4.409-release.apk')'''
def analysis_app(self, log, apkobj, dexobj, analysisobj): dexobj.set_decompiler(DecompilerDAD(dexobj, analysisobj)) apk_filename = log.filename CGpath = apk_filename.replace(self.APKPath, self.CGPath)[:-4] CGfilename = os.path.join(CGpath, "call.gml") if not os.path.exists(CGpath): try: os.makedirs(CGpath) except Exception: pass opcodeFilename = apk_filename.replace( self.APKPath, self.FeaturePath + "/opcode").replace(".apk", ".csv") opcodePath = opcodeFilename[:opcodeFilename.rfind('/')] if not os.path.exists(opcodePath): try: os.makedirs(opcodePath) except Exception: pass permissionFilename = apk_filename.replace( self.APKPath, self.FeaturePath + "/permission").replace(".apk", ".csv") permissionPath = permissionFilename[:permissionFilename.rfind('/')] if not os.path.exists(permissionPath): try: os.makedirs(permissionPath) except Exception: pass tplFilename = apk_filename.replace(self.APKPath, self.FeaturePath + "/tpl").replace( ".apk", ".csv") tplPath = tplFilename[:tplFilename.rfind('/')] if not os.path.exists(tplPath): try: os.makedirs(tplPath) except Exception: pass if not os.path.exists(CGfilename): G = analysisobj.get_call_graph() # call graph nx.write_gml(G, CGfilename, stringizer=str) # save the call graph self.call_graphs.append(CGfilename) G = nx.read_gml(CGfilename, label='id') if os.path.exists(tplFilename): return opcodeFile = utils.create_csv(self.smali_opcode, opcodeFilename) method2nodeMap = self.getMethod2NodeMap(G) if method2nodeMap == {}: _settings.logger.error("%s has call graph error" % log.filename) print("%s has call graph error" % log.filename) return class_functions = defaultdict( list) # mappings of class and its functions super_dic = { } # mappings of class and its superclass(for class replacement) implement_dic = {} for classes in analysisobj.get_classes(): # all class class_name = str(classes.get_class().get_name()) if classes.extends != "Ljava/lang/Object;": super_dic[class_name] = str(classes.extends) if str(classes.extends) in self.replacemap: implement_dic[class_name] = str(classes.extends) if classes.implements: for imp in classes.implements: if str(imp) in self.replacemap: implement_dic[class_name] = str(imp) for method in classes.get_methods(): if method.is_external(): continue m = method.get_method() class_functions[class_name].append(str(m.full_name)) c = defaultdict(int) flag = False for ins in m.get_instructions(): # count flag = True # exist instructions c[ins.get_name()] += 1 opcode = {} for p in self.smali_opcode: opcode[p] = 0 for op in c: if op in self.smali_opcode: opcode[op] += c[op] if flag: try: utils.write_csv(opcode, opcodeFile, method2nodeMap[str(m.full_name)][0]) except Exception: print("apk: %s, method: %s not exists" % (log.filename, str(m.full_name))) opcodeFile.close() cpermission = Permission(G=G, path=permissionFilename, class_functions=class_functions, super_dic=super_dic, implement_dic=implement_dic, dexobj=dexobj, permission=self.permission, cppermission=self.cppermission, method2nodeMap=method2nodeMap) cpermission.generate() class2init = cpermission.getClass2init() sensitiveapimap = cpermission.getsensitive_api() ctpl = Tpl(log.filename, G, tplFilename, sensitiveapimap, self.permission, class2init) ctpl.generate()