def addDEY(self, filename, data, dx=None): """ Add an ODEX file to the session and run the analysis """ digest = hashlib.sha256(data).hexdigest() log.debug("add DEY:%s" % digest) d = DalvikOdexVMFormat(data) log.debug("added DEY:%s" % digest) self.analyzed_files[filename].append(digest) self.analyzed_digest[digest] = filename self.analyzed_dex[digest] = d if self.export_ipython: d.create_python_export() if dx is None: dx = Analysis() dx.add(d) dx.create_xref() for d in dx.vms: # TODO: allow different decompiler here! d.set_decompiler(DecompilerDAD(d, dx)) d.set_vmanalysis(dx) self.analyzed_vms[digest] = dx return digest, d, dx
def addAPK(self, filename, data): """ Add an APK file to the Session and run analysis on it. :param filename: (file)name of APK file :param data: binary data of the APK file :return: a tuple of SHA256 Checksum and APK Object """ digest = hashlib.sha256(data).hexdigest() log.debug("add APK:%s" % digest) apk = APK(data, True) self.analyzed_apk[digest] = [apk] self.analyzed_files[filename].append(digest) self.analyzed_digest[digest] = filename dx = Analysis() self.analyzed_vms[digest] = dx for dex in apk.get_all_dex(): # we throw away the output... FIXME? self.addDEX(filename, dex, dx, postpone_xref=True) # Postponed dx.create_xref() log.debug("added APK:%s" % digest) return digest, apk
def run(self): unique_apis = [] for apk in self.apks_list: try: with ZipFile(apk) as zipfile: # find .dex files inside apk dexes = [dex for dex in zipfile.namelist() if dex.endswith('.dex')] dx = Analysis() # analyze every .dex for dex in dexes: with zipfile.open(dex) as dexfile: d = DalvikVMFormat(dexfile.read()) dx.add(d) # creates cross references between classes, methods, etc. for all the .dex dx.create_xref() # extracting android apis apis = self.get_api_calls(dx) not_unique = unique_apis + apis unique_apis = list(np.unique(not_unique)) print('Process %d: %.1f%%' % (self.process_id, ((self.apks_list.index(apk) + 1) / self.total_apks) * 100)) except BadZipfile as e: print('Bad zip file =========> %s' % apk) except Exception as e: print('\n%s\n%s\n' % (apk, e)) self.queue.put(unique_apis) print('----------------> Process %d is done!' % self.process_id)
class CFG(): def __init__(self, filename): self.filename = filename try: self.a = APK(filename) self.d = DalvikVMFormat(self.a.get_dex()) self.d.create_python_export() self.dx = Analysis(self.d) except zipfile.BadZipfile: # if file is not an APK, may be a dex object _, self.d, self.dx = AnalyzeDex(self.filename) self.d.set_vmanalysis(self.dx) self.dx.create_xref() self.cfg = self.build_cfg() def get_cg(self): return self.cfg def get_cfg(self): return self.dx.get_call_graph() def build_cfg(self): """ Using NX and Androguard, build a directed graph NX object so that: - node names are analysis.MethodClassAnalysis objects - each node has a label that encodes the method behavior """ cfg = self.get_cfg() ##/////////My changes/////////////// for n in cfg.nodes: instructions = [] # print(n) try: ops = n.get_instructions() for i in ops: instructions.append(i.get_name()) # print(ops) encoded_label = self.color_instructions(instructions) # print("No Exception") except AttributeError: encoded_label = np.array([0] * 15) cfg.node[n]["label"] = encoded_label return cfg def color_instructions(self, instructions): """ Node label based on coloring technique by Kruegel """ h = [0] * len(INSTRUCTION_CLASS_COLOR) for i in instructions: h[INSTRUCTION_SET_COLOR[i]] = 1 return np.array(h) def get_classes_from_label(self, label): classes = [ INSTRUCTION_CLASSES[i] for i in range(len(label)) if label[i] == 1 ] return classes
def AnalyzeAPK(_file, session=None, raw=False): """ Analyze an android application and setup all stuff for a more quickly analysis! If session is None, no session is used at all. This is the default behaviour. If you like to continue your work later, it might be a good idea to use a session. A default session can be created by using :meth:`~get_default_session`. :param _file: the filename of the android application or a buffer which represents the application :type _file: string (for filename) or bytes (for raw) :param session: A session (default: None) :param raw: boolean if raw bytes are supplied instead of a filename :rtype: return the :class:`~androguard.core.bytecodes.apk.APK`, list of :class:`~androguard.core.bytecodes.dvm.DalvikVMFormat`, and :class:`~androguard.core.analysis.analysis.Analysis` objects """ log.debug("AnalyzeAPK") if session: log.debug("Using existing session {}".format(session)) if raw: data = _file filename = hashlib.md5(_file).hexdigest() else: with open(_file, "rb") as fd: data = fd.read() filename = _file digest = session.add(filename, data) return session.get_objects_apk(filename, digest) else: log.debug("Analysing without session") a = APK(_file, raw=raw) # FIXME: probably it is not necessary to keep all DalvikVMFormats, as # they are already part of Analysis. But when using sessions, it works # this way... d = [] dx = Analysis() for dex in a.get_all_dex(): df = DalvikVMFormat(dex, using_api=a.get_target_sdk_version()) dx.add(df) d.append(df) df.set_decompiler(decompiler.DecompilerDAD(d, dx)) dx.create_xref() return a, d, dx
def addDEX(self, filename, data, dx=None, postpone_xref=False): """ Add a DEX file to the Session and run analysis. :param filename: the (file)name of the DEX file :param data: binary data of the dex file :param dx: an existing Analysis Object (optional) :param postpone_xref: True if no xref shall be created, and will be called manually :return: A tuple of SHA256 Hash, DalvikVMFormat Object and Analysis object """ digest = hashlib.sha256(data).hexdigest() log.debug("add DEX:%s" % digest) log.debug("Parsing format ...") d = DalvikVMFormat(data) log.debug("added DEX:%s" % digest) self.analyzed_files[filename].append(digest) self.analyzed_digest[digest] = filename self.analyzed_dex[digest] = d if dx is None: dx = Analysis() dx.add(d) if not postpone_xref: dx.create_xref() # TODO: If multidex: this will called many times per dex, even if already set for d in dx.vms: # TODO: allow different decompiler here! d.set_decompiler(DecompilerDAD(d, dx)) d.set_vmanalysis(dx) self.analyzed_vms[digest] = dx if self.export_ipython: log.debug("Exporting in ipython") d.create_python_export() return digest, d, dx
def addDEX(self, filename, data, dx=None): """ Add a DEX file to the Session and run analysis. :param filename: the (file)name of the DEX file :param data: binary data of the dex file :param dx: an existing Analysis Object (optional) :return: A tuple of SHA256 Hash, DalvikVMFormat Object and Analysis object """ digest = hashlib.sha256(data).hexdigest() log.debug("add DEX:%s" % digest) log.debug("Parsing format ...") d = DalvikVMFormat(data) log.debug("added DEX:%s" % digest) self.analyzed_files[filename].append(digest) self.analyzed_digest[digest] = filename self.analyzed_dex[digest] = d if dx is None: dx = Analysis() dx.add(d) dx.create_xref() # TODO: If multidex: this will called many times per dex, even if already set for d in dx.vms: # TODO: allow different decompiler here! d.set_decompiler(DecompilerDAD(d, dx)) d.set_vmanalysis(dx) self.analyzed_vms[digest] = dx if self.export_ipython: log.debug("Exporting in ipython") d.create_python_export() return digest, d, dx
def analyze(path_to_apk): logging.info("Starting analysis...") app_to_analyze = AnalyzedApk() try: a, d, dx = invoke_androguard(path_to_apk) except zipfile.BadZipFile: # File is invalid -> we need a unique dummy name for the app to generate a valid json file hasher = hashlib.md5() with open(path_to_apk, "rb") as apk: buf = apk.read() hasher.update(buf) app_to_analyze.app_name = "error" app_to_analyze.package_name = hasher.hexdigest() app_to_analyze.error = True return app_to_analyze app_to_analyze = init_basic_infos(a, app_to_analyze) app_to_analyze = filter_target_sdk(a, app_to_analyze) app_to_analyze = filter_manifest_permission_requests(a, app_to_analyze) if app_to_analyze.error: logging.critical( "Error during extraction of basic infos, maybe the app is obfuscated!" ) exit(42) if not app_to_analyze.is_analyzable(): logging.error( "App is not analyzable, skipping request and usage analysis") return app_to_analyze # Prepare analysis instance analysis = Analysis() for vm in d: analysis.add(vm) logging.info("Creating XREFs...") analysis.create_xref() main_activities = list( map(lambda act: act.replace(".", "/"), a.get_main_activities())) app_to_analyze = run_request_analysis(app_to_analyze, a, analysis, main_activities) try: app_to_analyze = run_usage_analysis(app_to_analyze, analysis) except: traceback.print_exc(file=sys.stdout) logging.info( "An error occurred during usage analysis. " + "Still continuing, hoping that the request analysis is valuable..." ) app_to_analyze.error = True logging.info("Finished analysis...") return app_to_analyze
def run(self): """Run androguard to extract static android information @return: list of static features """ self.key = "apkinfo" apkinfo = {} if "file" not in self.task["category"] or not HAVE_ANDROGUARD: return f = File(self.task["target"]) #if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path) try: a = APK(self.file_path) if a.is_valid_APK(): manifest = {} apkinfo["files"] = self._apk_files(a) manifest["package"] = a.get_package() apkinfo["hidden_payload"] = [] for file in apkinfo["files"]: if self.file_type_check(file): apkinfo["hidden_payload"].append(file) apkinfo["files_flaged"] = self.files_name_map manifest["permissions"] = get_permissions(a) manifest["main_activity"] = a.get_main_activity() manifest["activities"] = a.get_activities() manifest["services"] = a.get_services() manifest["receivers"] = a.get_receivers() manifest["receivers_actions"] = get_extended_receivers(a) manifest["providers"] = a.get_providers() manifest["libraries"] = list(a.get_libraries()) apkinfo["manifest"] = manifest apkinfo["icon"] = get_apk_icon(self.file_path) certificate = get_certificate(self.file_path) if certificate: apkinfo["certificate"] = certificate #vm = DalvikVMFormat(a.get_dex()) #strings = vm.get_strings() strings = self._get_strings(self.file_path) apkinfo["interesting_strings"] = find_strings(strings) apkinfo["dex_strings"] = strings static_calls = {} if self.options.decompilation: if self.check_size(apkinfo["files"]): vm = DalvikVMFormat(a.get_dex()) vmx = Analysis(vm) vmx.create_xref() static_calls["all_methods"] = get_methods(vmx) static_calls[ "permissions_method_calls"] = get_show_Permissions( vmx) static_calls[ "native_method_calls"] = get_show_NativeMethods( vmx) static_calls["is_native_code"] = bool( static_calls["native_method_calls"] ) # True if not empty, False if empty static_calls[ "dynamic_method_calls"] = get_show_DynCode(vmx) static_calls["is_dynamic_code"] = bool( static_calls["dynamic_method_calls"]) static_calls[ "reflection_method_calls"] = get_show_ReflectionCode( vmx) static_calls["is_reflection_code"] = bool( static_calls["reflection_method_calls"]) static_calls[ "crypto_method_calls"] = get_show_CryptoCode(vmx) static_calls["is_crypto_code"] = bool( static_calls["crypto_method_calls"]) classes = list() for cls in vm.get_classes(): classes.append(cls.name) static_calls["classes"] = classes else: log.warning( "Aborted decompilation, static extraction of calls not perforemd", ) apkinfo["static_method_calls"] = static_calls except (IOError, OSError, BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return apkinfo
class PDG(): def __init__(self, filename): """ :type self: object """ self.filename = filename try: self.a = APK(filename) self.d = DalvikVMFormat(self.a.get_dex()) self.d.create_python_export() self.dx = Analysis(self.d) except zipfile.BadZipfile: # if file is not an APK, may be a dex object _, self.d, self.dx = AnalyzeDex(self.filename) self.d.set_vmanalysis(self.dx) self.dx.create_xref() self.fcg = self.dx.get_call_graph() self.icfg = self.build_icfg() def get_graph(self): return self.icfg def build_icfg(self): icfg = nx.DiGraph() methods = self.d.get_methods() for method in methods: for bb in self.dx.get_method(method).basic_blocks.get(): children = [] label = self.get_bb_label(bb) children = self.get_children(bb, self.dx) icfg.add_node(label) icfg.add_edges_from([(label, child) for child in children]) return icfg def get_bb_label(self, bb): """ Return the descriptive name of a basic block """ return self.get_method_label(bb.method) + (bb.name, ) def get_method_label(self, method): """ Return the descriptive name of a method """ return (method.get_class_name(), method.get_name(), method.get_descriptor()) def get_children(self, bb, dx): """ Return the labels of the basic blocks that are children of the input basic block in and out of its method """ return self.get_bb_intra_method_children( bb) + self.get_bb_extra_method_children(bb, dx) def get_bb_intra_method_children(self, bb): """ Return the labels of the basic blocks that are children of the input basic block within a method """ child_labels = [] for c_in_bb in bb.get_next(): next_bb = c_in_bb[2] child_labels.append(self.get_bb_label(next_bb)) return child_labels def get_bb_extra_method_children(self, bb, dx): """ Given a basic block, find the calls to external methods and return the label of the first basic block in these methods """ call_labels = [] # iterate over calls from bb method to external methods try: xrefs = dx.get_method_analysis(bb.method).get_xref_to() except AttributeError: return call_labels for xref in xrefs: remote_method_offset = xref[2] if self.call_in_bb(bb, remote_method_offset): try: remote_method = dx.get_method( self.d.get_method_by_idx(remote_method_offset)) if remote_method: remote_bb = next(remote_method.basic_blocks.get()) call_labels.append(self.get_bb_label(remote_bb)) except StopIteration: pass return call_labels def call_in_bb(self, bb, idx): return bb.get_start() <= idx <= bb.get_end()
def main(): for path in samples(): print(path) logging.error("Processing" + path) tests_apk = [ "is_valid_APK", "get_filename", "get_app_name", "get_app_icon", "get_package", "get_androidversion_code", "get_androidversion_name", "get_files", "get_files_types", "get_files_crc32", "get_files_information", "get_raw", "get_dex", "get_all_dex", "get_main_activity", "get_activities", "get_services", "get_receivers", "get_providers", "get_permissions", "get_details_permissions", "get_requested_aosp_permissions", "get_requested_aosp_permissions_details", "get_requested_third_party_permissions", "get_declared_permissions", "get_declared_permissions_details", "get_max_sdk_version", "get_min_sdk_version", "get_target_sdk_version", "get_libraries", "get_android_manifest_axml", "get_android_manifest_xml", "get_android_resources", "get_signature_name", "get_signature_names", "get_signature", "get_signatures" ] tests_dex = [ "get_api_version", "get_classes_def_item", "get_methods_id_item", "get_fields_id_item", "get_codes_item", "get_string_data_item", "get_debug_info_item", "get_header_item", "get_class_manager", "show", # "save", # FIXME broken "get_classes_names", "get_classes", "get_all_fields", "get_fields", "get_methods", "get_len_methods", "get_strings", "get_format_type", "create_python_export", "get_BRANCH_DVM_OPCODES", "get_determineNext", "get_determineException", "print_classes_hierarchy", "list_classes_hierarchy", "get_format" ] try: # Testing APK a = APK(path) for t in tests_apk: print(t) x = getattr(a, t) try: x() except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {}".format(path, t)) # Testing DEX dx = Analysis() for dex in a.get_all_dex(): d = DalvikVMFormat(dex) dx.add(d) # Test decompilation for c in d.get_classes(): for m in c.get_methods(): mx = dx.get_method(m) ms = DvMethod(mx) try: ms.process(doAST=True) except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {} .. {}".format( path, c.get_name(), m.get_name())) ms2 = DvMethod(mx) try: ms2.process(doAST=False) except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {} .. {}".format( path, c.get_name(), m.get_name())) # DEX tests for t in tests_dex: print(t) x = getattr(d, t) try: x() except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {}".format(path, t)) # Analysis Tests try: dx.create_xref() except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {} at Analysis".format(path, t)) # MethodAnalysis tests for m in dx.methods.values(): for bb in m.get_basic_blocks(): try: list(bb.get_instructions()) except Exception as aaa: print(aaa) traceback.print_exc() print(path, aaa, file=sys.stderr) logging.exception("{} .. {} at BasicBlock {}".format( path, t, m)) except KeyboardInterrupt: raise except FileNotFoundError: pass except Exception as e: print(e) traceback.print_exc() print(path, e, file=sys.stderr) logging.exception(path)
class FCG(): ##/////////////Changed for testing///////////////////// def __init__(self, filename): self.filename = filename # print(os.path.exists(filename)) # a,d,dx = AnalyzeAPK(filename) # print(dx.get_call_graph()) try: self.a = APK(filename) self.d = DalvikVMFormat(self.a.get_dex()) self.d.create_python_export() self.dx = Analysis(self.d) except zipfile.BadZipfile: # if file is not an APK, may be a dex object _, self.d, self.dx = AnalyzeDex(self.filename) self.d.set_vmanalysis(self.dx) self.dx.create_xref() self.fcg = self.build_fcg() def get_fcg(self): return self.fcg def get_lock_graph(self): graph_list = [] # print("LockGraphs", self.dx) call_graph = self.dx.get_call_graph() # print("Call Graphs") for m in (self.dx.find_methods( classname='Landroid.os.PowerManager.WakeLock' )): ##//////////Work fine but found 3 method so will use when done # print("Method=", m.get_method()) ancestors = nx.ancestors(call_graph, m.get_method()) ancestors.add(m.get_method()) graph = call_graph.subgraph(ancestors) graph_list.append(graph) wake_graph = nx.compose_all(graph_list) return wake_graph def build_fcg(self): """ Using NX and Androguard, build a directed graph NX object so that: - node names are analysis.MethodClassAnalysis objects - each node has a label that encodes the method behavior """ fcg = self.get_lock_graph() ##/////////My changes/////////////// for n in fcg.nodes: instructions = [] try: ops = n.get_instructions() for i in ops: instructions.append(i.get_name()) encoded_label = self.color_instructions(instructions) except AttributeError: encoded_label = np.array([0] * 15) fcg.node[n]["label"] = encoded_label return fcg def color_instructions(self, instructions): """ Node label based on coloring technique by Kruegel """ h = [0] * len(INSTRUCTION_CLASS_COLOR) for i in instructions: h[INSTRUCTION_SET_COLOR[i]] = 1 return np.array(h) def get_classes_from_label(self, label): classes = [ INSTRUCTION_CLASSES[i] for i in range(len(label)) if label[i] == 1 ] return classes