def write_one_apk_source(apkfile): a = apk.APK(apkfile, False, 'r', None, 2) d = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(d) dp = decompiler.DecompilerDAD(d, vmx) # DAD是androguard内部的decompiler a1, d1, dx = AnalyzeAPK(apkfile) CFG = nx.DiGraph() with open( 'F:\\2018年第一学年科研\\APK科研\\数据集\\Word2vec_ao_yuliaoku\\test\\successed_1.txt', 'w') as txtData: for k in d.get_classes(): print('class_name:' + k.get_name()) txtData.writelines(dp.get_source_class(k)) for m in dx.find_methods(classname=k.get_name()): orig_method = m.get_method() if isinstance(orig_method, ExternalMethod): is_this_external = True else: is_this_external = False CFG.add_node(orig_method, external=is_this_external) if is_this_external == False: # 用于获取一个class里面的所有方法 print('orig::' + orig_method.get_name()) else: print('orig+external::' + orig_method.get_name()) for other_class, callee, offset in m.get_xref_to(): if isinstance(callee, ExternalMethod): is_external = True else: is_external = False if callee not in CFG.node: CFG.add_node(callee, external=is_external) if is_external == False: print('external+callee::' + callee.get_name()) else: print('callee:' + callee.get_name())
def get_apis(path, filename): app = apk.APK(path) app_dex = dvm.DalvikVMFormat(app.get_dex()) # app_x = analysis.newVMAnalysis(app_dex) app_x = analysis.Analysis(app_dex) methods = set() cs = [cc.get_name() for cc in app_dex.get_classes()] for method in app_dex.get_methods(): g = app_x.get_method(method) if method.get_code() == None: continue for i in g.get_basic_blocks().get(): for ins in i.get_instructions(): output = ins.get_output() match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output) if match and match.group(1) not in cs: methods.add(match.group()) methods = list(methods) methods.sort() print "methods:" + "\n" print methods str = "Methods:" file = methods writeToTxt(str, file, filename) return methods
def __init__(self, *args, **kwargs): super(RenameTest, self).__init__(*args, **kwargs) with open("examples/android/TestsAndroguard/bin/classes.dex", "rb") as fd: self.d = dvm.DalvikVMFormat(fd.read()) self.dx = analysis.Analysis(self.d) self.d.set_vmanalysis(self.dx)
def Deal_one_apk_new(apkfile): a = apk.APK(apkfile, False, 'r', None, 2) d = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(d) dp = decompiler.DecompilerDAD(d, vmx) # DAD是androguard内部的decompiler a1, d1, dx = AnalyzeAPK(apkfile) CFG = nx.DiGraph() apk_word = [] for k in d.get_classes(): print('class_name:' + k.get_name()) # print(dp.get_source_class(k)) for m in dx.find_methods(classname=k.get_name()): orig_method = m.get_method() # print(type(orig_method)) if isinstance(orig_method, ExternalMethod): is_this_external = True else: is_this_external = False CFG.add_node(orig_method, external=is_this_external) if is_this_external == False: # 用于获取一个class里面的所有方法 print('orig::' + orig_method.get_name()) else: print('orig+externalmethod::' + orig_method.get_name()) for other_class, callee, offset in m.get_xref_to(): if isinstance(callee, ExternalMethod): is_external = True else: is_external = False if callee not in CFG.node: CFG.add_node(callee, external=is_external) if is_external == False: print('external+callee::' + callee.get_name()) else: print('callee:' + callee.get_name())
def Get_APK_Words(apkfile): a = apk.APK(apkfile, False, 'r', None, 2) d = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(d) dp = decompiler.DecompilerDAD(d, vmx) # DAD是androguard内部的decompiler a1, d1, dx = AnalyzeAPK(apkfile) CFG = nx.DiGraph() class_code_dic, is_amd_class_code_dic = Build_APK_Corpus(apkfile) for k in d.get_classes(): # 用于遍历每一个class all_orig_methods = [] # 用于统计一个class里面的所有的method # print(type(k)) print('class_name+super_name::' + k.get_name() + ':' + k.get_superclassname()) for m in dx.find_methods( classname=k.get_name()): # 用于将一个class里面所有的原始方法提取到 orig_method = m.get_method() if isinstance(orig_method, ExternalMethod): is_this_external = True else: is_this_external = False CFG.add_node(orig_method, external=is_this_external) if not isinstance(orig_method, ExternalMethod): all_orig_methods.append(orig_method) # 用于得到所有的原始方法 for m in dx.find_methods( classname=k.get_name()): # 用于遍历一个class里面的所有的方法 orig_method = m.get_method() if not isinstance(orig_method, ExternalMethod): if (orig_method.get_name() in all_callback) or ( orig_method.get_name() in APK_Method_Key_Words.key_registers): print('method_name+method_descriptor::' + orig_method.get_name() + orig_method.get_descriptor()) print(class_code_dic[k.get_name()][ orig_method.get_name() + orig_method.get_descriptor()])
def __init__(self, name): vm = auto_vm(name) if vm is None: raise ValueError('Format not recognised: %s' % name) self.vma = analysis.Analysis(vm) self.classes = dict( (dvclass.get_name(), dvclass) for dvclass in vm.get_classes())
def test(apkfile): a = apk.APK(apkfile, False, 'r', None, 2) d = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(d) dp = decompiler.DecompilerDAD(d, vmx) # DAD是androguard内部的decompiler for k in d.get_classes(): print(dp.get_source_class(k))
def AnalysisStart(self, filepath, savefolder): apk_dex, apk_name, apk_flag = self.getdex(filepath) d = dvm.DalvikVMFormat(apk_dex) x = analysis.Analysis(d) #判断APK是否有效 if apk_flag: pass else: deal_msg = 'not valid' return apk_name, deal_msg # 创建保存的文件夹 path = savefolder + filepath.split('/')[-1].split('.')[0] + '/' if not os.path.exists(path): os.makedirs(path) print(path, 'create success') else: deal_msg = path + ' already exist' return apk_name, deal_msg base_source = path + apk_name + '_base_source.txt' bs_file = open(base_source, 'w') for method in d.get_methods(): g = x.get_method(method) if method.get_code() == None: continue idx = 0 graphh = self.get_graph(g) self.has_cycle(graphh) properity = [] index = 0 for i in g.get_basic_blocks().get(): index = index + 1 node_pro = [] child_num = 0 father_num = 0 node_pro.append(index) w = self.find_nodes(i) loop = self.find_loops(graphh, i) node_pro.append(w) node_pro.append(loop) for j in i.get_next(): child_num = child_num + 1 for j in i.get_prev(): father_num = father_num + 1 node_pro.append(child_num) node_pro.append(father_num) ins_len = 0 for ins in i.get_instructions(): idx += ins.get_length() ins_len = ins_len + 1 node_pro.append(ins_len) properity.append(node_pro) self.save_base_feature(bs_file, method.get_class_name(), method.get_name(), properity) bs_file.close() return apk_name, 'analysis success'
def test(file_path): try: app = apk.APK(file_path) app_dex = dvm.DalvikVMFormat(app.get_dex()) app_x = analysis.Analysis(app_dex) app.show() except Exception as e: print(e)
def extract_features(file_path): a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) return a.get_permissions()
def get_compressed_feature_vector(path, api_call_dictionary): ''' Get a compressed feature vector. Parameters ---------- path : the path of the file to get a compressed feature vector api_call_dictionary : a dictionary of API calls Returns ------- feature_vector : a compressed feature vector recursion_error_count : the number of recursion errors ''' max_sequences = max_calls feature_vector = np.zeros((max_calls, max_sequences), dtype = int) call_count = 0 sequence_count = 0 if path.endswith('.apk'): app = apk.APK(path) app_dex = dvm.DalvikVMFormat(app.get_dex()) else: app_dex = dvm.DalvikVMFormat(open(path, 'rb').read()) app_x = analysis.Analysis(app_dex) class_names = [classes.get_name() for classes in app_dex.get_classes()] recursion_error_count = 0 for method in app_dex.get_methods(): g = app_x.get_method(method) if method.get_code() == None: continue for i in g.get_basic_blocks().get(): if i.childs != [] and sequence_count < max_sequences: call_count = 0 for ins in i.get_instructions(): output = ins.get_output() # this is a string that contains methods, variables, or anything else match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output) if match and match.group(1) not in class_names and call_count < max_calls: feature_vector[call_count, sequence_count] = api_call_dictionary[match.group()] call_count += 1 rand_child_selected = np.random.randint(len(i.childs)) recursion_error_count = traverse_graph(i.childs[rand_child_selected][2], feature_vector, class_names, call_count, sequence_count, recursion_error_count, api_call_dictionary) sequence_count += 1 return feature_vector, recursion_error_count
def generate_dictionary(apk_folder_directory) -> int: ''' Generate a dictionary storing mapping all distinct API calls to numbers and pickle the dictionary. Parameters ---------- apk_folder_directory : the directory of the folder containing APKs for scanning Returns ------- dictionary_length : the length of the API call dictionary (-1 if any exception occurs) ''' api_call_dictionary = {} try: for file in os.listdir(apk_folder_directory): file_path = os.path.join(apk_folder_directory, file) if file_path.endswith('.apk'): app = apk.APK(file_path) app_dex = dvm.DalvikVMFormat(app.get_dex()) else: continue app_x = analysis.Analysis(app_dex) method_list = [] class_names = [classes.get_name() for classes in app_dex.get_classes()] for method in app_dex.get_methods(): g = app_x.get_method(method) if method.get_code() == None: continue for i in g.get_basic_blocks().get(): for ins in i.get_instructions(): output = ins.get_output() # this is a string that contains methods, variables, or anything else match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output) if match and match.group(1) not in class_names: method_list.append(match.group()) if not api_call_dictionary.__contains__(match.group()): api_call_dictionary[match.group()] = len(api_call_dictionary) except: return -1 dictionary_stream = open(get_dictionary_path(apk_folder_directory), 'wb') pickle.dump(api_call_dictionary, dictionary_stream, protocol = pickle.DEFAULT_PROTOCOL) dictionary_stream.close() return len(api_call_dictionary)
def Deal_one_apk(apkfile): a = apk.APK(apkfile, False, 'r', None, 2) d = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(d) dp = decompiler.DecompilerDAD(d, vmx) # DAD是androguard内部的decompiler a1, d1, dx = AnalyzeAPK(apkfile) CFG = nx.DiGraph() apk_word = [] class_code_dic, is_amd_class_code_dic = Update_one_apk_dictory(apkfile) for k in d.get_classes(): amd_p1 = 'Lmy/app/client/Client;' if k.get_name() == amd_p1: # print(dp.get_source_class(k)) all_orig_methods = [] for m in dx.find_methods( classname=amd_p1): #用于将一个class里面所有的原始方法提取到 orig_method = m.get_method() if isinstance(orig_method, ExternalMethod): is_this_external = True else: is_this_external = False CFG.add_node(orig_method, external=is_this_external) if not isinstance(orig_method, ExternalMethod): all_orig_methods.append(orig_method) for m in dx.find_methods(classname=amd_p1): #用于遍历一个class里面的所有的方法 orig_method = m.get_method() if not isinstance(orig_method, ExternalMethod): if (orig_method.get_name() in all_callback) or ( orig_method.get_name() in APK_Method_Key_Words.key_registers): callees = [] # 用于将所有的不在这个class里面的函数保存下来 for other_class, callee, offset in m.get_xref_to(): if isinstance(callee, ExternalMethod): is_external = True else: is_external = False if callee not in CFG.node: CFG.add_node(callee, external=is_external) callees.append(callee) for callee in callees: if callee in all_orig_methods: class_code_dic[k.get_name()][ orig_method.get_name() + orig_method. get_descriptor()] += class_code_dic[ k.get_name()][callee.get_name() + callee.get_descriptor()] print(class_code_dic[k.get_name()] [orig_method.get_name() + orig_method.get_descriptor()]) print(is_amd_class_code_dic[k.get_name()] [orig_method.get_name() + orig_method.get_descriptor()]) return apk_word
def testMethodRename(self): with open("examples/android/TestsAndroguard/bin/classes.dex", "rb") as fd: d = dvm.DalvikVMFormat(fd.read()) dx = analysis.Analysis(d) d.set_vmanalysis(dx) meth, = d.get_method("testDouble") self.assertEqual(meth.get_name(), "testDouble") meth.set_name("blablaMyMethod") self.assertEqual(meth.get_name(), "blablaMyMethod")
def GetAnalyzedDex(sample_name): a = APK(sample_name) d = dvm.DalvikVMFormat(a.get_dex()) dx = analysis.Analysis(d) if type(dx) is list: dx = dx[0] dx.create_xref() return dx
def testMultiDexExternal(self): """ Test if classes are noted as external if not both zips are opened """ from zipfile import ZipFile with ZipFile("examples/tests/multidex/multidex.apk") as myzip: c1 = myzip.read("classes.dex") c2 = myzip.read("classes2.dex") d1 = dvm.DalvikVMFormat(c1) d2 = dvm.DalvikVMFormat(c2) dx = analysis.Analysis() dx.add(d1) # Both classes should be in the analysis, but only the fist is internal self.assertIn("Lcom/foobar/foo/Foobar;", dx.classes) self.assertFalse(dx.classes["Lcom/foobar/foo/Foobar;"].is_external()) self.assertNotIn("Lcom/blafoo/bar/Blafoo;", dx.classes) dx = analysis.Analysis() dx.add(d2) self.assertIn("Lcom/blafoo/bar/Blafoo;", dx.classes) self.assertFalse(dx.classes["Lcom/blafoo/bar/Blafoo;"].is_external()) self.assertNotIn("Lcom/foobar/foo/Foobar;", dx.classes) # Now we "see" the reference to Foobar dx.create_xref() self.assertIn("Lcom/foobar/foo/Foobar;", dx.classes) self.assertTrue(dx.classes["Lcom/foobar/foo/Foobar;"].is_external()) dx = analysis.Analysis() dx.add(d1) dx.add(d2) self.assertIn("Lcom/blafoo/bar/Blafoo;", dx.classes) self.assertFalse(dx.classes["Lcom/blafoo/bar/Blafoo;"].is_external()) self.assertIn("Lcom/foobar/foo/Foobar;", dx.classes) self.assertFalse(dx.classes["Lcom/foobar/foo/Foobar;"].is_external())
def Get_Apk_Source_Code(apkfile): a = apk.APK(apkfile, False, 'r', None, 2) d = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(d) dp = decompiler.DecompilerDAD(d, vmx) # DAD是androguard内部的decompiler #用于获取源码 with open('F:\\2018年第一学年科研\\APK科研\\数据集\\Word2vec_ao_yuliaoku\\source.txt', 'w') as txtfile: for k in d.get_classes(): # 对于每一个class # print(dp.get_source_class(k)) txtfile.writelines(dp.get_source_class(k))
def create_adex(self, log, dexobj): """ This method is called in order to create an Analysis object :param log: an object which corresponds to a unique app :param androguard.core.bytecodes.dvm.DalvikVMFormat dexobj: a :class:`DalvikVMFormat` object :rytpe: a :class:`~androguard.core.analysis.analysis.Analysis` object """ vm_analysis = analysis.Analysis(dexobj) vm_analysis.create_xref() return vm_analysis
def create_adex(self, log, dexobj): """ This method is called in order to create a VMAnalysis object :param log: an object which corresponds to a unique app :param dexobj: a :class:`DalvikVMFormat` object :rytpe: a :class:`Analysis` object """ vm_analysis = analysis.Analysis(dexobj) vm_analysis.create_xref() return vm_analysis
def static_dex2(app, result): libs = get_libs_from_dex(app.get_filename()) app_dex = None app_x = None try: app_dex = dvm.DalvikVMFormat(app.get_dex()) app_x = analysis.Analysis(app_dex) except Exception as e: traceback.print_exc() apis = {} method_strings = {} if not (app_dex is None or app_x is None): apis = gen_apis_one_cnt(app, app_dex, app_x, libs) #print(len(apis)) method_strings = gen_strings_from_dex(app, app_dex, app_x, libs) result['dex'] = {"apis": apis, "strings": method_strings, "libs": libs}
def __init__(self, name): """ :param name: filename to load """ self.vma = analysis.Analysis() # Proper detection which supports multidex inside APK ftype = androconf.is_android(name) if ftype == 'APK': for d in apk.APK(name).get_all_dex(): self.vma.add(dvm.DalvikVMFormat(d)) elif ftype == 'DEX': self.vma.add(dvm.DalvikVMFormat(read(name))) elif ftype == 'DEY': self.vma.add(dvm.DalvikOdexVMFormat(read(name))) else: raise ValueError("Format not recognised for filename '%s'" % name) self.classes = {dvclass.orig_class.get_name(): dvclass.orig_class for dvclass in self.vma.get_classes()}
def compile_dex(apkfile, filtercfg): show_logging(level=logging.INFO) d = auto_vm(apkfile) dx = analysis.Analysis(d) method_filter = MethodFilter(filtercfg, d) compiler = Dex2C(d, dx) compiled_method_code = {} errors = [] for m in d.get_methods(): method_triple = get_method_triple(m) jni_longname = JniLongName(*method_triple) full_name = ''.join(method_triple) if len(jni_longname) > 220: logger.debug("name to long %s(> 220) %s" % (jni_longname, full_name)) continue if method_filter.should_compile(m): logger.debug("compiling %s" % (full_name)) try: code = compiler.get_source_method(m) except Exception as e: logger.warning("compile method failed:%s (%s)" % (full_name, str(e)), exc_info=True) errors.append('%s:%s' % (full_name, str(e))) continue if code: compiled_method_code[method_triple] = code return compiled_method_code, errors
def Deal_one_apk(apkfile): a = apk.APK(apkfile, False, 'r', None, 2) d = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(d) dp = decompiler.DecompilerDAD(d, vmx) # DAD是androguard内部的decompiler a1, d1, dx = AnalyzeAPK(apkfile) CFG = nx.DiGraph() apk_word = [] for k in d.get_classes(): # print('class_name:'+k.get_name()) amd_p1 = 'Lcom/mix_four/dd/LockReceiver;' amd_p2 = 'Lcom/mix_four/dd/CoreService$MyOrderRunnable;' amd_p3 = 'Landroid/support/v4/content/IntentCompat$IntentCompatImplBase;' amd_p4 = 'Lcom/mix_four/dd/CoreService$MyOrderRunnable;' if k.get_name() == amd_p1: print('class_name:' + k.get_name()) print(dp.get_source_class(k)) for m in dx.find_methods(classname=amd_p1): orig_method = m.get_method() if isinstance(orig_method, ExternalMethod): is_this_external = True else: is_this_external = False CFG.add_node(orig_method, external=is_this_external) if is_this_external == False: # 用于获取一个class里面的所有方法 print('orig::' + orig_method.get_name()) for other_class, callee, offset in m.get_xref_to(): if isinstance(callee, ExternalMethod): is_external = True else: is_external = False if callee not in CFG.node: CFG.add_node(callee, external=is_external) if is_external == False: print('external+callee::' + callee.get_name()) else: print('callee:' + callee.get_name()) return apk_word
def analyze_dex(filename, raw=False, decompiler=None): """ Analyze an android dex file and setup all stuff for a more quickly analysis ! :param filename: the filename of the android dex file or a buffer which represents the dex file :type filename: string :param raw: True is you would like to use a buffer (optional) :type raw: boolean :param decompiler: the type of decompiler to use ("dad", "dex2jad", "ded") :type decompiler: string :rtype: return the :class:`DalvikVMFormat`, and :class:`VMAnalysis` objects """ d = None if raw: d = DalvikVMFormat(filename) else: d = DalvikVMFormat(open(filename, "rb").read()) dx = analysis.Analysis(d) d.set_vmanalysis(dx) run_decompiler(d, dx, decompiler) dx.create_xref() return d, dx
def Update_one_apk_dictory(apkfile): a = apk.APK(apkfile, False, 'r', None, 2) d = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(d) dp = decompiler.DecompilerDAD(d, vmx) # DAD是androguard内部的decompiler a1, d1, dx = AnalyzeAPK(apkfile) CFG = nx.DiGraph() class_code_dic, is_amd_class_code_dic = Get_one_apk_dictory(apkfile) for k in d.get_classes(): if k.get_name() == k.get_name(): for m in dx.find_methods( classname=k.get_name()): # 用于遍历一个class里面的所有的方法 orig_method = m.get_method() if isinstance(orig_method, ExternalMethod): is_this_external = True else: is_this_external = False CFG.add_node(orig_method, external=is_this_external) #将原始结点保存在CFG中 callees = [] # 用于将所有的不在这个class里面的函数保存下来 for other_class, callee, offset in m.get_xref_to(): if isinstance(callee, ExternalMethod): is_external = True else: is_external = False if callee not in CFG.node: CFG.add_node( callee, external=is_external) #将不在此method的所有的函数保存在CFG中 callees.append(callee) '----------提出一个问题-----对于一个class里面的函数调用另外一个class里面的函数' orig_method_key_words = [] if not isinstance(orig_method, ExternalMethod): # 用于将属于这个class的所有method提取出来 b = 'false' orig_method_key_words += (class_code_dic[k.get_name()][ orig_method.get_name() + orig_method.get_descriptor()]) for callee in callees: # 用于统计非该class里面的函数,并且更新origin函数的所有关键字 if callee.get_class_name() in class_code_dic.keys(): # 如果函数在字典里面,则将其关键字提取出来 if (callee.get_name() + callee.get_descriptor()) in class_code_dic[ callee.get_class_name()].keys(): orig_method_key_words += class_code_dic[ callee.get_class_name()][ callee.get_name() + callee.get_descriptor()] if is_amd_class_code_dic[callee.get_class_name( )][callee.get_name() + callee.get_descriptor()] == 'true': b = 'true' else: pass else: orig_method_key_words += ( Get_Word(callee.get_name() + callee.get_descriptor())) # -------用于更新一下原始函数的key值 class_code_dic[k.get_name()][ orig_method.get_name() + orig_method.get_descriptor()] = orig_method_key_words if b == 'true': is_amd_class_code_dic[k.get_name()][ orig_method.get_name() + orig_method.get_descriptor()] = 'true' return class_code_dic, is_amd_class_code_dic
def extract_features(file_path): result = {} print('1.1') a, d, dx = AnalyzeAPK(file_path) print('1.2') vm = dvm.DalvikVMFormat(a.get_dex()) print('1.3') vmx = analysis.Analysis(vm) print('1.4') #d.set_vmanalysis(dx) print('1.5') #d.set_decompiler(DecompilerDAD(d, dx)) print('1.6') result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = dx.get_strings() result['class_names'] = [c.name for c in dx.get_classes()] result['method_names'] = [m.name for m in dx.get_methods()] result['field_names'] = [f.name for f in dx.get_fields()] result['is_native_code'] = 1 result['is_obfuscation'] = 1 result['is_crypto_code'] = 1 result['is_dyn_code'] = 1 result['is_reflection_code'] = 1 result['is_database'] = 1 s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.find_methods(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if dx.find_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) return result
def testXrefs(self): """Test if XREFs produce the correct results""" with open("examples/android/TestsAndroguard/bin/classes.dex", "rb") as fd: d = dvm.DalvikVMFormat(fd.read()) dx = analysis.Analysis(d) dx.create_xref() testcls = dx.classes['Ltests/androguard/TestActivity;'] self.assertIsInstance(testcls, analysis.ClassAnalysis) testmeth = list( filter(lambda x: x.name == 'onCreate', testcls.get_methods()))[0] self.assertEqual( len(list(dx.find_methods(testcls.name, '^onCreate$'))), 1) self.assertEqual( list(dx.find_methods(testcls.name, '^onCreate$'))[0], testmeth) self.assertIsInstance(testmeth, analysis.MethodClassAnalysis) self.assertFalse(testmeth.is_external()) self.assertIsInstance(testmeth.method, dvm.EncodedMethod) self.assertEquals(testmeth.name, 'onCreate') xrefs = list( map( lambda x: x.full_name, map(itemgetter(1), sorted(testmeth.get_xref_to(), key=itemgetter(2))))) self.assertEqual(len(xrefs), 5) # First, super is called: self.assertEquals( xrefs.pop(0), 'Landroid/app/Activity; onCreate (Landroid/os/Bundle;)V') # then setContentView (which is in the current class but the method is external) self.assertEquals( xrefs.pop(0), 'Ltests/androguard/TestActivity; setContentView (I)V') # then getApplicationContext (inside the Toast) self.assertEquals( xrefs.pop(0), 'Ltests/androguard/TestActivity; getApplicationContext ()Landroid/content/Context;' ) # then Toast.makeText self.assertEquals( xrefs.pop(0), 'Landroid/widget/Toast; makeText (Landroid/content/Context; Ljava/lang/CharSequence; I)Landroid/widget/Toast;' ) # then show() self.assertEquals(xrefs.pop(0), 'Landroid/widget/Toast; show ()V') # Now, test if the reverse is true other = list(dx.find_methods('^Landroid/app/Activity;$', '^onCreate$')) self.assertEquals(len(other), 1) self.assertIsInstance(other[0], analysis.MethodClassAnalysis) self.assertTrue(other[0].is_external()) self.assertTrue(other[0].is_android_api()) self.assertIn(testmeth.method, map(itemgetter(1), other[0].get_xref_from())) other = list( dx.find_methods('^Ltests/androguard/TestActivity;$', '^setContentView$')) # External because not overwritten in class: self.assertEquals(len(other), 1) self.assertIsInstance(other[0], analysis.MethodClassAnalysis) self.assertTrue(other[0].is_external()) self.assertFalse(other[0].is_android_api()) self.assertIn(testmeth.method, map(itemgetter(1), other[0].get_xref_from())) other = list( dx.find_methods('^Ltests/androguard/TestActivity;$', '^getApplicationContext$')) # External because not overwritten in class: self.assertEquals(len(other), 1) self.assertIsInstance(other[0], analysis.MethodClassAnalysis) self.assertTrue(other[0].is_external()) self.assertFalse(other[0].is_android_api()) self.assertIn(testmeth.method, map(itemgetter(1), other[0].get_xref_from())) other = list(dx.find_methods('^Landroid/widget/Toast;$', '^makeText$')) self.assertEquals(len(other), 1) self.assertIsInstance(other[0], analysis.MethodClassAnalysis) self.assertTrue(other[0].is_external()) self.assertTrue(other[0].is_android_api()) self.assertIn(testmeth.method, map(itemgetter(1), other[0].get_xref_from())) other = list(dx.find_methods('^Landroid/widget/Toast;$', '^show$')) self.assertEquals(len(other), 1) self.assertIsInstance(other[0], analysis.MethodClassAnalysis) self.assertTrue(other[0].is_external()) self.assertTrue(other[0].is_android_api()) self.assertIn(testmeth.method, map(itemgetter(1), other[0].get_xref_from())) # Next test internal calls testmeth = list( filter(lambda x: x.name == 'testCalls', testcls.get_methods()))[0] self.assertEqual( len(list(dx.find_methods(testcls.name, '^testCalls$'))), 1) self.assertEqual( list(dx.find_methods(testcls.name, '^testCalls$'))[0], testmeth) self.assertIsInstance(testmeth, analysis.MethodClassAnalysis) self.assertFalse(testmeth.is_external()) self.assertIsInstance(testmeth.method, dvm.EncodedMethod) self.assertEquals(testmeth.name, 'testCalls') xrefs = list( map( lambda x: x.full_name, map(itemgetter(1), sorted(testmeth.get_xref_to(), key=itemgetter(2))))) self.assertEqual(len(xrefs), 4) self.assertEquals(xrefs.pop(0), 'Ltests/androguard/TestActivity; testCall2 (J)V') self.assertEquals(xrefs.pop(0), 'Ltests/androguard/TestIfs; testIF (I)I') self.assertEquals(xrefs.pop(0), 'Ljava/lang/Object; getClass ()Ljava/lang/Class;') self.assertEquals( xrefs.pop(0), 'Ljava/io/PrintStream; println (Ljava/lang/Object;)V') other = list( dx.find_methods('^Ltests/androguard/TestActivity;$', '^testCall2$')) self.assertEquals(len(other), 1) self.assertIsInstance(other[0], analysis.MethodClassAnalysis) self.assertFalse(other[0].is_external()) self.assertFalse(other[0].is_android_api()) self.assertIn(testmeth.method, map(itemgetter(1), other[0].get_xref_from())) other = list( dx.find_methods('^Ltests/androguard/TestIfs;$', '^testIF$')) self.assertEquals(len(other), 1) self.assertIsInstance(other[0], analysis.MethodClassAnalysis) self.assertFalse(other[0].is_external()) self.assertFalse(other[0].is_android_api()) self.assertIn(testmeth.method, map(itemgetter(1), other[0].get_xref_from())) other = list(dx.find_methods('^Ljava/lang/Object;$', '^getClass$')) self.assertEquals(len(other), 1) self.assertIsInstance(other[0], analysis.MethodClassAnalysis) self.assertTrue(other[0].is_external()) self.assertTrue(other[0].is_android_api()) self.assertIn(testmeth.method, map(itemgetter(1), other[0].get_xref_from()))
def testDex(self): with open("examples/android/TestsAndroguard/bin/classes.dex", "rb") as fd: d = dvm.DalvikVMFormat(fd.read()) dx = analysis.Analysis(d) self.assertIsInstance(dx, analysis.Analysis)
def Get_one_apk_dictory(apkfile): a = apk.APK(apkfile, False, 'r', None, 2) d = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(d) dp = decompiler.DecompilerDAD(d, vmx) # DAD是androguard内部的decompiler a1, d1, dx = AnalyzeAPK(apkfile) CFG = nx.DiGraph() class_code_dic = {} # 用于将class里面的所有关键的代码保存下来 # 其格式为 class_code_dic={'class1':'key_class1','class2':'key_class2'} is_amd_class_code_dic = {} #用于将class里面的所有method里面是否含有amd数据保存下来 for k in d.get_classes(): method_dic = { } # 用于将一个方法里面的所有关键代码保存下来,其形式是method_dic={'method1':'key_word1','method2':'....} is_amd_method_dic = {} # print('class_name:' + k.get_name()) # print(dp.get_source_class(k)) for m in dx.find_methods(classname=k.get_name()): orig_method = m.get_method() if isinstance(orig_method, ExternalMethod): #将原始结点保存在CFG中 is_this_external = True else: is_this_external = False CFG.add_node(orig_method, external=is_this_external) callees = [] # 用于将所有的不在这个class里面的函数保存下来 for other_class, callee, offset in m.get_xref_to(): if isinstance(callee, ExternalMethod): is_external = True else: is_external = False if callee not in CFG.node: #将非原始结点,即外部的结点保存在CFG中 CFG.add_node(callee, external=is_external) callees.append(callee) orig_method_code = [] orig_method_key_words = [] if not isinstance(orig_method, ExternalMethod): # 用于将这个class创建的所有函数获取到 orig_method_code = orig_method.get_source().split('\n') orig_method_code = [i.strip() for i in orig_method_code] orig_method_key_words += Get_Word(orig_method.get_name() + '==' + orig_method.get_descriptor()) for callee in callees: if not isinstance(callee, ExternalMethod): if callee.get_name() != orig_method.get_name(): callee_code = callee.get_source().split('\n') callee_code = [i.strip() for i in callee_code] orig_method_code += callee_code # 得到一个method的内部的所有的源代码 orig_method_key_words += Get_Word( callee.get_name() + '==' + callee.get_descriptor()) if not isinstance( orig_method, ExternalMethod): # 如果在一个class里面的method是非外部方法,则提取其内部代码 amd_num = 0 # 用于统计这个method是否是包含amd的method # --------------用于判断amd数据--------------- for key in APK_Method_Key_Words.amd_key_words: for code in orig_method_code: # 得到一个method内部的所有的源码 if key in code: amd_num += 1 orig_method_key_words += Get_Word(key) # ----------用于判断intent代理机制------------- for key in APK_Method_Key_Words.key_Intent: for code in orig_method_code: # 得到一个method内部的所有的源码 if key in code: key_word = Get_Word(key) key_word.append('Rbracket') orig_method_key_words += key_word if amd_num > 0: # 用于判断此函数里面是否含有恶意代码 is_amd_method_dic[orig_method.get_name() + orig_method.get_descriptor()] = 'true' else: is_amd_method_dic[orig_method.get_name() + orig_method.get_descriptor()] = 'false' method_dic[ orig_method.get_name() + orig_method.get_descriptor()] = orig_method_key_words class_code_dic[k.get_name()] = method_dic # 用于将一个class里面的所有关键代码保存下来 is_amd_class_code_dic[k.get_name()] = is_amd_method_dic # print(is_amd_class_code_dic) # print(class_code_dic) #用于将俩个字典返回,目的是为了进行后面的工作 return class_code_dic, is_amd_class_code_dic
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except Exception as e: print e return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] # result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 # result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 # result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 # result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method_by_name(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) result['feature_vectors']['others'] = [ # result['is_reflection_code'], # result['is_crypto_code'], # result['is_native_code'], result['is_obfuscation'], result['is_database'], # result['is_dyn_code'] ] return result