def get_apis(path): application = APK(path) application_dex = DalvikVMFormat(application.get_dex()) application_x = Analysis(application_dex) methods = set() cs = [cc.get_name() for cc in application_dex.get_classes()] for method in application_dex.get_methods(): g = application_x.get_method(method) if method.get_code() == None: continue for i in g.get_basic_blocks().get(): for ins in i.get_instructions(): output = ins.get_output() match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output) if match and match.group(1) not in cs: methods.add(match.group()) methods = list(methods) return methods
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) #vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() #result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] #result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 #result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 '''result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0''' result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 arr = [] s = a.get_elements("action", "name") for i in s: arr.append(i) result['intents'] = arr s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method(call) else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) #Search for the presence of intents in a given apk result['feature_vectors']['intents'] = [] n = len(INTENTS) m = len(result['intents']) for i in range(n): stri = INTENTS[i] flg = False for j in range(m): if stri in result['intents'][j]: flg = True break if flg: status = 1 else: status = 0 result['feature_vectors']['intents'].append(status) #Check for special strings in code result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) return result
class PDG(): def __init__(self, filename): """ :type self: object """ self.filename = filename try: self.a = APK(filename) self.d = DalvikVMFormat(self.a.get_dex()) self.d.create_python_export() self.dx = Analysis(self.d) except zipfile.BadZipfile: # if file is not an APK, may be a dex object _, self.d, self.dx = AnalyzeDex(self.filename) self.d.set_vmanalysis(self.dx) self.dx.create_xref() self.fcg = self.dx.get_call_graph() self.icfg = self.build_icfg() def get_graph(self): return self.icfg def build_icfg(self): icfg = nx.DiGraph() methods = self.d.get_methods() for method in methods: for bb in self.dx.get_method(method).basic_blocks.get(): children = [] label = self.get_bb_label(bb) children = self.get_children(bb, self.dx) icfg.add_node(label) icfg.add_edges_from([(label, child) for child in children]) return icfg def get_bb_label(self, bb): """ Return the descriptive name of a basic block """ return self.get_method_label(bb.method) + (bb.name, ) def get_method_label(self, method): """ Return the descriptive name of a method """ return (method.get_class_name(), method.get_name(), method.get_descriptor()) def get_children(self, bb, dx): """ Return the labels of the basic blocks that are children of the input basic block in and out of its method """ return self.get_bb_intra_method_children( bb) + self.get_bb_extra_method_children(bb, dx) def get_bb_intra_method_children(self, bb): """ Return the labels of the basic blocks that are children of the input basic block within a method """ child_labels = [] for c_in_bb in bb.get_next(): next_bb = c_in_bb[2] child_labels.append(self.get_bb_label(next_bb)) return child_labels def get_bb_extra_method_children(self, bb, dx): """ Given a basic block, find the calls to external methods and return the label of the first basic block in these methods """ call_labels = [] # iterate over calls from bb method to external methods try: xrefs = dx.get_method_analysis(bb.method).get_xref_to() except AttributeError: return call_labels for xref in xrefs: remote_method_offset = xref[2] if self.call_in_bb(bb, remote_method_offset): try: remote_method = dx.get_method( self.d.get_method_by_idx(remote_method_offset)) if remote_method: remote_bb = next(remote_method.basic_blocks.get()) call_labels.append(self.get_bb_label(remote_bb)) except StopIteration: pass return call_labels def call_in_bb(self, bb, idx): return bb.get_start() <= idx <= bb.get_end()
def main(): for path in samples(): print(path) logging.error("Processing" + path) tests_apk = [ "is_valid_APK", "get_filename", "get_app_name", "get_app_icon", "get_package", "get_androidversion_code", "get_androidversion_name", "get_files", "get_files_types", "get_files_crc32", "get_files_information", "get_raw", "get_dex", "get_all_dex", "get_main_activity", "get_activities", "get_services", "get_receivers", "get_providers", "get_permissions", "get_details_permissions", "get_requested_aosp_permissions", "get_requested_aosp_permissions_details", "get_requested_third_party_permissions", "get_declared_permissions", "get_declared_permissions_details", "get_max_sdk_version", "get_min_sdk_version", "get_target_sdk_version", "get_libraries", "get_android_manifest_axml", "get_android_manifest_xml", "get_android_resources", "get_signature_name", "get_signature_names", "get_signature", "get_signatures" ] tests_dex = [ "get_api_version", "get_classes_def_item", "get_methods_id_item", "get_fields_id_item", "get_codes_item", "get_string_data_item", "get_debug_info_item", "get_header_item", "get_class_manager", "show", # "save", # FIXME broken "get_classes_names", "get_classes", "get_all_fields", "get_fields", "get_methods", "get_len_methods", "get_strings", "get_format_type", "create_python_export", "get_BRANCH_DVM_OPCODES", "get_determineNext", "get_determineException", "print_classes_hierarchy", "list_classes_hierarchy", "get_format" ] try: # Testing APK a = APK(path) for t in tests_apk: print(t) x = getattr(a, t) try: x() except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {}".format(path, t)) # Testing DEX dx = Analysis() for dex in a.get_all_dex(): d = DalvikVMFormat(dex) dx.add(d) # Test decompilation for c in d.get_classes(): for m in c.get_methods(): mx = dx.get_method(m) ms = DvMethod(mx) try: ms.process(doAST=True) except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {} .. {}".format( path, c.get_name(), m.get_name())) ms2 = DvMethod(mx) try: ms2.process(doAST=False) except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {} .. {}".format( path, c.get_name(), m.get_name())) # DEX tests for t in tests_dex: print(t) x = getattr(d, t) try: x() except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {}".format(path, t)) # Analysis Tests try: dx.create_xref() except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {} at Analysis".format(path, t)) # MethodAnalysis tests for m in dx.methods.values(): for bb in m.get_basic_blocks(): try: list(bb.get_instructions()) except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {} at BasicBlock {}".format( path, t, m)) except KeyboardInterrupt: raise except FileNotFoundError: pass except Exception as e: print(e) traceback.print_exc() print(path, e, file=sys.stderr) logging.exception(path)
def get_apis(path): methods = set() error_file = open("error_files.txt", "w") """ Get the APIs from an application. Parameters: path - The path of the application to be decompiled Returns: A sorted list of APIs with parameters """ try: # You can see the documents of androguard to get the further details # of the decompilation procedures. # 获取APK文件对象 # application:表示APK对象,在其中可以找到有关 APK 的信息,例如包名、权限、AndroidManifest.xml、resources # application_dex:DalvikVMFormat 对象数组,DalvikVMFormat 对应 apk 文件中的 dex 文件,从 dex 文件中我们可以获取类、方法和字符串。 # application_x:表示 Analysis 对象,其包含链接了关于 classes.dex 信息的特殊的类,甚至可以一次处理许多 dex 文件。 application = APK(path) application_dex = DalvikVMFormat(application.get_dex()) application_x = Analysis(application_dex) # 获得class 对象 classesList = [classes.get_name() for classes in application_dex.get_classes()] # print("classesList:", classesList) # 获得methods方法 for method in application_dex.get_methods(): methodAnalysis = application_x.get_method(method) if method.get_code() is None: continue for basicBlocks in methodAnalysis.get_basic_blocks().get(): # 获得jvm指令 for instructions in basicBlocks.get_instructions(): # 这是一个包含方法,变量或其他任何内容的字符串 output = instructions.get_output() # print("output", output) # Here we use regular expression to check if it is a function # call. A function call comprises four parts: a class name, a # function name, zero or more parameters, and a return type. # The pattern is actually simple: # # CLASS NAME: starts with a character L and ends in a right # arrow. # FUNCTION NAME: starts with the right arrow and ends in a # left parenthesis. # PARAMETERS: are between the parentheses. # RETURN TYPE: is the rest of the string. # # 这里拿到的classList是应用本身的类,第二个匹配的组(一个括号一个组)是调用的类不是应用本身的类,是系统的类 # 这里就是通过系统接口调用来做判断。 match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output) # print("----") # if match: # log(match.group() + "----" + match.group(1), "->" + (match.group(1) not in classesList)) # print(match.group() + "----" + match.group(1), "->", (match.group(1) not in classesList)) # print("isInClassList:", match.group(1), "->", (match.group(1) not in classesList)) # match.group():Landroid/app/IntentService;-><init>(Ljava/lang/String;)V # match.group(1):Landroid/app/IntentService; if match and match.group(1) not in classesList: # print(match.group() + "----" + match.group(1)) methods.add(match.group()) methods = list(methods) except Exception as e: print(e) print("path", path) error_file.write(path) return methods
def main(): for path in samples(): print(path) logging.error("Processing" + path) tests_apk = ["is_valid_APK", "get_filename", "get_app_name", "get_app_icon", "get_package", "get_androidversion_code", "get_androidversion_name", "get_files", "get_files_types", "get_files_crc32", "get_files_information", "get_raw", "get_dex", "get_all_dex", "get_main_activity", "get_activities", "get_services", "get_receivers", "get_providers", "get_permissions", "get_details_permissions", "get_requested_aosp_permissions", "get_requested_aosp_permissions_details", "get_requested_third_party_permissions", "get_declared_permissions", "get_declared_permissions_details", "get_max_sdk_version", "get_min_sdk_version", "get_target_sdk_version", "get_libraries", "get_android_manifest_axml", "get_android_manifest_xml", "get_android_resources", "get_signature_name", "get_signature_names", "get_signature", "get_signatures"] tests_dex = ["get_api_version", "get_classes_def_item", "get_methods_id_item", "get_fields_id_item", "get_codes_item", "get_string_data_item", "get_debug_info_item", "get_header_item", "get_class_manager", "show", "save", "get_classes_names", "get_classes", "get_all_fields", "get_fields", "get_methods", "get_len_methods", "get_strings", "get_format_type", "create_python_export", "get_BRANCH_DVM_OPCODES", "get_determineNext", "get_determineException", "print_classes_hierarchy", "list_classes_hierarchy", "get_format"] try: # Testing APK a = APK(path) for t in tests_apk: print(t) x = getattr(a, t) try: x() except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {}".format(path, t)) # Testing DEX for dex in a.get_all_dex(): d = DalvikVMFormat(dex) dx = Analysis(d) d.set_vmanalysis(dx) # Test decompilation for c in d.get_classes(): for m in c.get_methods(): mx = dx.get_method(m) ms = DvMethod(mx) try: ms.process(doAST=True) except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {} .. {}".format(path, c.get_name(), m.get_name())) ms2 = DvMethod(mx) try: ms2.process(doAST=False) except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {} .. {}".format(path, c.get_name(), m.get_name())) # Other tests for t in tests_dex: print(t) x = getattr(d, t) try: x() except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {}".format(path, t)) except KeyboardInterrupt: raise except FileNotFoundError: pass except Exception as e: print(e) traceback.print_exc() print(path, e, file=sys.stderr) logging.exception(path)