def each(self, target): self.results = dict(name=None, files=[], package=None, permissions=[], declared_permissions=[], main_activity=None, activities=[], receivers=[], services=[], manifest=None, libraries=[], main_activity_content=None, internal_classes=[]) try: apk, vm, vm_analysis = AnalyzeAPK(target) # First, get basic information about the APK self.results['name'] = apk.get_app_name() self.results['files'] = apk.get_files_types() self.results['package'] = apk.get_package() self.results['permissions'] = apk.get_details_permissions() self.results[ 'declared_permissions'] = apk.get_declared_permissions_details( ) self.results['main_activity'] = apk.get_main_activity() self.results['activities'] = apk.get_activities() self.results['receivers'] = apk.get_receivers() self.results['services'] = apk.get_services() self.results['manifest'] = apk.get_android_manifest_axml().get_xml( ) self.results['libraries'] = list(apk.get_libraries()) self.results['main_activity_content'] = None self.results['internal_classes'] = [] try: self.results['main_activity_content'] = self.results[ 'main_activity_content'] = vm[0].get_class( "L{};".format(self.results['main_activity']).replace( '.', '/')).get_source() except: self.log('error', traceback.print_exc()) try: self.results['internal_classes'] = self._get_internal_classes( vm_analysis) self._store_internal_classes() except: self.log('error', traceback.print_exc()) # Then, run all the APK Plugins in order to see if this is a known malware for plugin in APKPlugin.__subclasses__(): plugin = plugin(target, apk, vm, vm_analysis) plugin.apply(self) except: self.log('error', traceback.print_exc()) return True
def main(): parser = ArgumentParser(description="Create a call graph based on the data" "of Analysis and export it into a graph format.") parser.add_argument("APK", nargs=1, help="The APK to analyze") parser.add_argument("--output", "-o", default="callgraph.gml", help="Filename of the output file, the extension is used to decide which format to use (default callgraph.gml)") parser.add_argument("--show", "-s", action="store_true", default=False, help="instead of saving the graph, print it with mathplotlib (you might not see anything!") parser.add_argument("--verbose", "-v", action="store_true", default=False, help="Print more output") parser.add_argument("--classname", default=".*", help="Regex to filter by classname") parser.add_argument("--methodname", default=".*", help="Regex to filter by methodname") parser.add_argument("--descriptor", default=".*", help="Regex to filter by descriptor") parser.add_argument("--accessflag", default=".*", help="Regex to filter by accessflags") parser.add_argument("--no-isolated", default=False, action="store_true", help="Do not store methods which has no xrefs") args = parser.parse_args() if args.verbose: show_logging(logging.INFO) a, d, dx = AnalyzeAPK(args.APK[0]) entry_points = map(FormatClassToJava, a.get_activities() + a.get_providers() + a.get_services() + a.get_receivers()) entry_points = list(entry_points) log.info("Found The following entry points by search AndroidManifest.xml: {}".format(entry_points)) CG = generate_graph(dx, args.classname, args.methodname, args.descriptor, args.accessflag, args.no_isolated, entry_points, ) write_methods = dict(gml=_write_gml, gexf=nx.write_gexf, gpickle=nx.write_gpickle, graphml=nx.write_graphml, yaml=nx.write_yaml, net=nx.write_pajek, ) if args.show: plot(CG) else: writer = args.output.rsplit(".", 1)[1] if writer in ["bz2", "gz"]: writer = args.output.rsplit(".", 2)[1] if writer not in write_methods: print("Could not find a method to export files to {}!".format(writer)) sys.exit(1) write_methods[writer](CG, args.output)
def main(): parser = ArgumentParser(description="Create a call graph based on the data" "of Analysis and export it into a graph format.") parser.add_argument("APK", nargs=1, help="The APK to analyze") parser.add_argument("--output", "-o", default="callgraph.gml", help="Filename of the output file, the extension is used to decide which format to use (default callgraph.gml)") parser.add_argument("--show", "-s", action="store_true", default=False, help="instead of saving the graph, print it with mathplotlib (you might not see anything!") parser.add_argument("--verbose", "-v", action="store_true", default=False, help="Print more output") parser.add_argument("--classname", default=".*", help="Regex to filter by classname") parser.add_argument("--methodname", default=".*", help="Regex to filter by methodname") parser.add_argument("--descriptor", default=".*", help="Regex to filter by descriptor") parser.add_argument("--accessflag", default=".*", help="Regex to filter by accessflags") parser.add_argument("--no-isolated", default=False, action="store_true", help="Do not store methods which has no xrefs") args = parser.parse_args() if args.verbose: show_logging(logging.INFO) a, d, dx = AnalyzeAPK(args.APK[0]) entry_points = map(FormatClassToJava, a.get_activities() + a.get_providers() + a.get_services() + a.get_receivers()) entry_points = list(entry_points) log.info("Found The following entry points by search AndroidManifest.xml: {}".format(entry_points)) CG = dx.get_call_graph(args.classname, args.methodname, args.descriptor, args.accessflag, args.no_isolated, entry_points, ) write_methods = dict(gml=_write_gml, gexf=nx.write_gexf, gpickle=nx.write_gpickle, graphml=nx.write_graphml, yaml=nx.write_yaml, net=nx.write_pajek, ) if args.show: plot(CG) else: writer = args.output.rsplit(".", 1)[1] if writer in ["bz2", "gz"]: writer = args.output.rsplit(".", 2)[1] if writer not in write_methods: print("Could not find a method to export files to {}!".format(writer)) sys.exit(1) write_methods[writer](CG, args.output)
def androcg_main(verbose, APK, classname, methodname, descriptor, accessflag, no_isolated, show, output): from androguard.core.androconf import show_logging from androguard.core.bytecode import FormatClassToJava from androguard.misc import AnalyzeAPK import networkx as nx import logging log = logging.getLogger("androcfg") if verbose: show_logging(logging.INFO) a, d, dx = AnalyzeAPK(APK) entry_points = map(FormatClassToJava, a.get_activities() + a.get_providers() + a.get_services() + a.get_receivers()) entry_points = list(entry_points) log.info("Found The following entry points by search AndroidManifest.xml: " "{}".format(entry_points)) CG = dx.get_call_graph(classname, methodname, descriptor, accessflag, no_isolated, entry_points, ) write_methods = dict(gml=_write_gml, gexf=nx.write_gexf, gpickle=nx.write_gpickle, graphml=nx.write_graphml, yaml=nx.write_yaml, net=nx.write_pajek, ) if show: plot(CG) else: writer = output.rsplit(".", 1)[1] if writer in ["bz2", "gz"]: writer = output.rsplit(".", 2)[1] if writer not in write_methods: print("Could not find a method to export files to {}!" .format(writer)) sys.exit(1) write_methods[writer](CG, output)
def androcg_main(verbose, APK, classname, methodname, descriptor, accessflag, no_isolated, show, output): from androguard.core.androconf import show_logging from androguard.core.bytecode import FormatClassToJava from androguard.misc import AnalyzeAPK import networkx as nx import logging log = logging.getLogger("androcfg") if verbose: show_logging(logging.INFO) a, d, dx = AnalyzeAPK(APK) entry_points = map(FormatClassToJava, a.get_activities() + a.get_providers() + a.get_services() + a.get_receivers()) entry_points = list(entry_points) log.info("Found The following entry points by search AndroidManifest.xml: " "{}".format(entry_points)) CG = dx.get_call_graph(classname, methodname, descriptor, accessflag, no_isolated, entry_points, ) write_methods = dict(gml=_write_gml, gexf=nx.write_gexf, gpickle=nx.write_gpickle, graphml=nx.write_graphml, yaml=nx.write_yaml, net=nx.write_pajek, ) if show: plot(CG) else: writer = output.rsplit(".", 1)[1] if writer in ["bz2", "gz"]: writer = output.rsplit(".", 2)[1] if writer not in write_methods: print("Could not find a method to export files to {}!" .format(writer)) sys.exit(1) write_methods[writer](CG, output)
def analyze(app): result = {} # We open the APK apk_path = c.get_apk_path(app) # Here we check if the APK is actually there, otherwise we skip the analysis if (not os.path.exists(apk_path)): return a, d, dx = AnalyzeAPK(apk_path) # Get all the permissions requested by the app requested_permissions = a.get_permissions() # Get all the Android activities of the app activities = a.get_activities() # Get all String constants in the app presumably containing a URL urls = list() for u in dx.find_strings("http[s]?://."): urls.append(u.get_value()) # We pack together all the partial results result['permissions'] = requested_permissions result['activities'] = activities result['urls'] = urls # We save the result into a JSON file app_suffix_path = app['id'] + c.SEPARATOR + app['latest_crawled_version'] result_path = c.DATA_PATH + app_suffix_path + c.SEPARATOR + 'androguard.json' c.save(result_path, result) # Now we run also the Androwarn analysis (with no Play Store look up) data = perform_analysis(apk_path, a, d, dx, False) # We generate the JSON report with the following parameters # Verbosity level: 3 (advanced) # Report type: json # Output path: same pattern as all the other JSON files produced so far androwarn_report_path = c.DATA_PATH + app_suffix_path + c.SEPARATOR + 'androwarn.json' generate_report(app['id'], data, 3, 'json', androwarn_report_path)
def analyze(args, apk_queue, res_queue, output_data): log = Logger(args.log_file, res_queue) while True: if apk_queue.empty(): return else: apk_file = apk_queue.get() file_path = args.in_dir + "/" + apk_file log.log("Checking: %s\n" % file_path) a,d,dx = AnalyzeAPK(file_path) act = "" for act in a.get_activities(): intent_list = a.get_intent_filters("activity",act) if INTENT_CATEGORY in str(intent_list): log.log("Found an interesting activity!") log.log(act) log.log(intent_list) log.log("\n\n") log.flush()
def get_call_graph(self, apk): a, d, dx = AnalyzeAPK(apk) entry_points = map( FormatClassToJava, a.get_activities() + a.get_providers() + a.get_services() + a.get_receivers()) entry_points = list(entry_points) #TODO make these Configurable # args.classname, # args.methodname, # args.descriptor, # args.accessflag, # args.no_isolated, CG = dx.get_call_graph(entry_points=entry_points) # write_methods = dict(gml=_write_gml, # gexf=nx.write_gexf, # gpickle=nx.write_gpickle, # graphml=nx.write_graphml, # yaml=nx.write_yaml, # net=nx.write_pajek, return CG
#!/usr/bin/python2 from collections import defaultdict from androguard.misc import AnalyzeAPK apk_path = raw_input('Provide path to apk\n') print('Analyzing...') apk, dex, vm = AnalyzeAPK(apk_path) print('\nActivities:') for activity in apk.get_activities(): print(activity) print('\nMain activity:') print(apk.get_main_activity()) print('\nSuper dangerous permissions:') for perm, details in apk.get_details_permissions().items(): if details[0] == 'dangerous': # Protection levels: https://developer.android.com/guide/topics/permissions/overview#normal-dangerous print(perm) print('\nServices:') for service in apk.get_services(): print(service) print('\nBroadcasts:') for receiver in apk.get_receivers(): print(receiver)
def api_check(folder, APKname): if os.path.exists("result/" + folder + APKname + 'data/'): print(APKname + " Already scanned") return print("Starting apk:" + APKname) apk_start_time = time.time() RESULTdict = dict.fromkeys(RESULT_PARAMS, 0) ##отдельные словари для фич OtherDict = dict.fromkeys(('obfuscation', 'database'), 0) APIdict = dict.fromkeys((API_CALLS + API_ClASS), 0) permission_dict = dict.fromkeys(PERMISSIONS, 0) strings_dict = dict.fromkeys(API_SYSTEM_COMMANDS, 0) groupAPI_dict = dict.fromkeys(APIGROUPS, 0) ##№№№ #a-APK d[0]-DalvikVMFormat dx-Analysis try: a, d, dx = AnalyzeAPK(folder + APKname) except: print(" ERROR: Androguard parse error, skipping file") return ### temp = a.get_details_permissions() temp2 = a.get_declared_permissions_details() temp3 = a.get_uses_implied_permission_list() # ########TODO почитать про использование пермишинсов без запросов #### RESULTdict["APP_Name"] = APKname RESULTdict['folder'] = folder #methods = [] #подозрительные строки RESULTdict["warn_strings"] = [] strings = dx.get_strings_analysis() #w=d[0].get_strings() list_system_commands = read_system_commands(strings, API_SYSTEM_COMMANDS) for i in list_system_commands: #print(i) RESULTdict["warn_strings"].append(i) for i in list_system_commands: strings_dict[i] += 1 ### общая информация RESULTdict['permissions'] = a.get_permissions() RESULTdict['activities'] = a.get_activities() RESULTdict['providers'] = a.get_providers() RESULTdict['services'] = a.get_services() RESULTdict['libraries'] = a.get_libraries() RESULTdict['is_obfuscation'] = 1 if is_ascii_obfuscation(d[0]) else 0 RESULTdict['is_database'] = 1 if d[0].get_regex_strings(DB_REGEX) else 0 #TODO intents_analysis from new.py OtherDict['obfuscation'] = RESULTdict['is_obfuscation'] OtherDict['database'] = RESULTdict['is_database'] #permissions RESULTdict['warn_permissions'] = [] #RESULTdict['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: if permission in RESULTdict['permissions']: RESULTdict['warn_permissions'].append(permission) permission_dict[permission] = 1 ########################################################################### #TODO подсчет групп АПИ и системных команд для вектора фич ########################################################################### #API RESULTdict['API_groups'] = [] external_classes = dx.get_external_classes() for i in external_classes: class_name = i.get_vm_class() methods_list = class_name.get_methods() for method in methods_list: a = '%s' % method.get_class_name().replace(';', '') b = '%s' % method.get_name() c = '%s' % method.get_descriptor() #TODO permission_api_name https://androguard.readthedocs.io/en/latest/api/androguard.core.analysis.html?highlight=permission#androguard.core.analysis.analysis.ExternalMethod.permission_api_name if b in API_CALLS: APIdict[b] += 1 ###TODO !!!нужна нормализация данных if a in API_ClASS: APIdict[a] += 1 temp = GroupAPI_Checker.checkAPIGroup(a.replace('/', '.')[1:], b) if (temp != None): groupAPI_dict[temp] += 1 RESULTdict['API_groups'].append(temp) ##запись общих параметров with open("result/" + 'API_CALLS.csv', 'a', encoding='utf8') as csvfile: fieldnames = (('APP_Name', 'folder') + API_CALLS + API_ClASS) writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() tempDict = APIdict.copy() tempDict['APP_Name'] = APKname tempDict['folder'] = folder writer.writerow(tempDict) with open("result/" + 'OtherDict.csv', 'a', encoding='utf8') as csvfile: fieldnames = 'APP_Name', 'folder', 'obfuscation', 'database' writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() tempDict = OtherDict.copy() tempDict['APP_Name'] = APKname tempDict['folder'] = folder writer.writerow(tempDict) with open("result/" + 'permission_dict.csv', 'a', encoding='utf8') as csvfile: fieldnames = ('APP_Name', 'folder') + PERMISSIONS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() tempDict = permission_dict.copy() tempDict['APP_Name'] = APKname tempDict['folder'] = folder writer.writerow(tempDict) with open("result/" + 'strings_dict.csv', 'a', encoding='utf8') as csvfile: fieldnames = ('APP_Name', 'folder') + API_SYSTEM_COMMANDS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() tempDict = strings_dict.copy() tempDict['APP_Name'] = APKname tempDict['folder'] = folder writer.writerow(tempDict) with open("result/" + 'groupAPI_dict.csv', 'a', encoding='utf8') as csvfile: fieldnames = ('APP_Name', 'folder') + APIGROUPS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() tempDict = groupAPI_dict.copy() tempDict['APP_Name'] = APKname tempDict['folder'] = folder writer.writerow(tempDict) with open("result/" + 'RESULTdict.csv', 'a', encoding='utf8') as csvfile: fieldnames = RESULT_PARAMS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() writer.writerow(RESULTdict) ##запись параметров данного приложения try: if os.path.exists("result/" + folder): os.mkdir('result/' + folder + APKname + 'data') else: os.mkdir('result/' + folder) os.mkdir('result/' + folder + APKname + 'data') except OSError: print("Создать директорию %s не удалось" % ('result/' + folder + APKname + 'data')) else: with open("result/" + folder + APKname + 'data/RESULT.csv', 'w', encoding='utf8') as csvfile: fieldnames = RESULT_PARAMS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(RESULTdict) with open("result/" + folder + APKname + 'data/OtherDict.csv', 'w', encoding='utf8') as csvfile: fieldnames = 'obfuscation', 'database' writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(OtherDict) with open("result/" + folder + APKname + 'data/APIdict.csv', 'w', encoding='utf8') as csvfile: fieldnames = API_CALLS + API_ClASS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(APIdict) with open("result/" + folder + APKname + 'data/permission_dict.csv', 'w', encoding='utf8') as csvfile: fieldnames = PERMISSIONS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(permission_dict) with open("result/" + folder + APKname + 'data/strings_dict.csv', 'w', encoding='utf8') as csvfile: fieldnames = API_SYSTEM_COMMANDS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(strings_dict) with open("result/" + folder + APKname + 'data/groupAPI_dict.csv', 'w', encoding='utf8') as csvfile: fieldnames = APIGROUPS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(groupAPI_dict) print("APK done:{} ".format(time.time() - apk_start_time))
apk.is_wearable(), 'max_sdk_version': (apk.get_max_sdk_version()), 'min_sdk_version': int(apk.get_min_sdk_version()), 'version_code': apk.xml['AndroidManifest.xml'].get( '{http://schemas.android.com/apk/res/android}versionCode'), 'libraries': list(apk.get_libraries()), 'androidtv': apk.is_androidtv(), 'target_sdk_version': apk.get_target_sdk_version(), 'api_keys': {}, # TODO 'activities': apk.get_activities(), 'main_activity': apk.get_main_activity(), 'receivers': apk.get_receivers(), 'signature_name': apk.get_signature_name(), 'dexes': {}, 'displayed_version': apk.xml['AndroidManifest.xml'].get( '{http://schemas.android.com/apk/res/android}versionName'), 'services': apk.get_services(), 'permissions': apk.get_permissions(), 'cordova':
class AndroidPackage(object): """Static android information.""" def __init__(self, filepath): self.filepath = filepath self.apk = None self.analysis = None def _get_detailed_permissions(self): """Return a list of all permission requests by the application.""" perms = [] for k, v in self.apk.get_details_permissions().items(): perms.append({ "name": k, "protection_level": v[0], "description": v[2] }) return perms def _enumerate_services(self): """Return a list of all services with their actions""" services = [] for _service in self.apk.get_services(): service = {} service["name"] = _service service["action"] = [] intent_filters = self.apk.get_intent_filters("service", _service) if "action" in intent_filters: service["action"] = intent_filters["action"] services.append(service) return services def _enumerate_receivers(self): """Return a list of all BroadcastReceiver's with their actions""" receivers = [] for _receiver in self.apk.get_receivers(): receiver = {} receiver["name"] = _receiver receiver["action"] = [] intent_filters = self.apk.get_intent_filters("receiver", _receiver) if "action" in intent_filters: receiver["action"] = intent_filters["action"] receivers.append(receiver) return receivers def _enumerate_apk_files(self): """Return a list of files in the APK.""" files = [] for filename, filetype in self.apk.get_files_types().items(): buf = self.apk.zip.read(filename) files.append({ "name": filename, "md5": hashlib.md5(buf).hexdigest(), "size": len(buf), "type": filetype, }) return files def _enumerate_encrypted_assets(self): """Returns a list of files in the APK assets that have high entropy.""" files = [] for filename, filetype in self.apk.get_files_types().items(): if "assets" in filename: buf = self.apk.zip.read(filename) file_entropy = entropy.shannon_entropy(buf) if file_entropy > 0.9: files.append({ "name": filename, "entropy": file_entropy, "size": len(buf), "type": filetype, }) return files def _get_certificates_info(self): """Return a list of APK certificates""" certficates = [] for cert in self.apk.get_certificates(): not_valid_after = cert['tbs_certificate']['validity'][ 'not_after'].native not_valid_before = cert['tbs_certificate']['validity'][ 'not_before'].native certficates.append({ "sha1": cert.sha1.encode("hex"), "sha256": cert.sha256.encode("hex"), "issuer": cert.issuer.human_friendly, "subject": cert.subject.human_friendly, "not_valid_after": not_valid_after.strftime("%Y-%m-%d %H:%M:%S"), "not_valid_before": not_valid_before.strftime("%Y-%m-%d %H:%M:%S"), "public_key_algorithm": cert.public_key.algorithm, "public_key_size": "%d bit" % cert.public_key.bit_size, "signature_algorithm": cert.signature_algo + " with " + cert.hash_algo, "signature": cert.signature.encode("hex"), "serial_number": str(cert.serial_number) }) return certficates def _enumerate_native_methods(self): """Return a list of all methods compiled in the application""" methods = [] for mca in self.analysis.get_methods(): if mca.is_external(): continue if not mca.get_method().get_access_flags() & 0x0100: continue methods.append(self._get_pretty_method(mca)) return methods def _get_pretty_method(self, mca): """Return a string representation of an API method. @param mca: MethodClassAnalysis object. """ class_name = mca.get_method().get_class_name().replace("/", ".")[1:-1] method_name = mca.get_method().get_name() return "%s.%s%s" % (class_name, method_name, mca.descriptor) def _enumerate_api_calls(self): """Return a dictionary of all APIs with their xrefs.""" classes = [] exclude_pattern = re.compile( "^(Lcom/google/|Landroid|Ljava|Lcom/sun/|Lorg/apache/|" "Lorg/spongycastle|Lmyjava/|Lkotlin/)") for ca in self.analysis.get_classes(): if ca.is_external(): continue if exclude_pattern.match(ca.name): continue classes.append(ca.name) calls = [] for class_name in classes: for mca in self.analysis.find_methods(class_name): xrefs_to = [] for _, m, _ in mca.get_xref_to(): callee_class = m.get_class_name().replace("/", ".")[1:-1] callee_api = "%s.%s" % (callee_class, m.get_name()) xrefs_to.append(callee_api) if not xrefs_to: continue api = {} api["name"] = self._get_pretty_method(mca) api["callees"] = xrefs_to calls.append(api) return calls def run(self): """Run androguard to extract static APK information @return: dict of static features. """ from androguard.misc import AnalyzeAPK logging.getLogger("androguard.dvm").setLevel(logging.WARNING) logging.getLogger("androguard.analysis").setLevel(logging.WARNING) logging.getLogger("androguard.misc").setLevel(logging.WARNING) logging.getLogger("androguard.apk").setLevel(logging.CRITICAL) try: self.apk, _, self.analysis = AnalyzeAPK(self.filepath) except (OSError, zipfile.BadZipfile) as e: log.error("Error parsing APK file: %s", e) return None manifest = {} if self.apk.is_valid_APK(): manifest["package"] = self.apk.get_package() manifest["services"] = self._enumerate_services() manifest["receivers"] = self._enumerate_receivers() manifest["providers"] = self.apk.get_providers() manifest["activities"] = self.apk.get_activities() manifest["main_activity"] = self.apk.get_main_activity() manifest["permissions"] = self._get_detailed_permissions() apkinfo = {} apkinfo["manifest"] = manifest apkinfo["files"] = self._enumerate_apk_files() apkinfo["encrypted_assets"] = self._enumerate_encrypted_assets() apkinfo["is_signed_v1"] = self.apk.is_signed_v1() apkinfo["is_signed_v2"] = self.apk.is_signed_v2() apkinfo["certificates"] = self._get_certificates_info() apkinfo["native_methods"] = self._enumerate_native_methods() apkinfo["api_calls"] = self._enumerate_api_calls() return apkinfo
def run(self): app.logger.info('new analysis') s = Session() self.status = 'Analyzing APK' a, d, dx = AnalyzeAPK(self.target_file, session=s) #APK,list[DalvikVMFormat],Analysis print(type(a), type(d[0]), type(dx)) #cache activities, receivers, services, and providers, because for some reason, saving the Session causes a bug, breaking getters """i.e. bytecodes/apk.py", line 582, in get_elements for item in self.xml[i].findall('.//' + tag_name): TypeError: string indices must be integers """ activities = a.get_activities() receivers = a.get_receivers() services = a.get_services() providers = a.get_providers() self.main_activity = a.get_main_activity() if self.session_save_file: sys.setrecursionlimit(100000000) self.status = 'Saving session file' Save(s, self.session_save_file) cached_analyses.append({'md5': self.md5, 'analysis': (a, d, dx)}) #gather all classes from dexs 'd' #classes = get_all_classes_from_dexs(d) classes = dx.classes total_num = len(classes) done = 0 #num of done classes #result_classes contains the completed analysis info for each class run through the ClassAnalysis object result_classes = [] analysis_start_time = time.time() self.status = 'Getting all classes' for c_name, c_analysis in classes.items(): ca = ClassAnalysis(c_name, c_analysis, activities, receivers, services, providers) ca_result = ca.run() result_classes.append(ca_result) done += 1 if done % ceil(total_num / 100) == 0: self.progress += 1 #app.logger.info(self.progress) # with app.test_request_context('/'): # socketio.emit('newstatus', {'data':self.progress}, namespace='/status') analysis_end_time = time.time() analysis_total_time = analysis_end_time - analysis_start_time #debugging: self.status = 'Writing beforenetworkx debugging JSON' with open(self.graph_out_path + '.beforenetworkx', 'w') as f: json.dump(result_classes, f, indent=4, separators=(',', ': '), sort_keys=True) #create a networkx graph given the completed analyses in result_classess create_graph_start_time = time.time() self.status = 'Creating graph out of {} classes analyzed'.format( len(result_classes)) graph = create_graph(classes=result_classes) create_graph_end_time = time.time() create_graph_total_time = create_graph_end_time - create_graph_start_time #write graph to file: graph_out_path write_graph_start_time = time.time() self.status = 'Writing graph to disk' write_graph(graph, self.graph_out_path) write_graph_end_time = time.time() write_graph_total_time = write_graph_end_time - write_graph_start_time #build and write another graph that contains only providers,receivers,activities, and services if self.component_subgraph_out_path: component_names = [] self.status = 'Getting component nodes from graph' for node in graph: node_tmp = graph.node[node] if node_tmp[ 'component_type'] != NonComponentType.EXTERNAL and node_tmp[ 'component_type'] != NonComponentType.INTERNAL: component_names.append(node_tmp['name']) self.status = 'Creating subgraph containing only components' subgraph = get_class_subgraph(graph, class_names=component_names) self.status = 'Writing subgraph to disk' write_graph(subgraph, self.component_subgraph_out_path) #app metadata for misc/debugging apk_size = os.path.getsize(self.target_file) self.status = 'Writing metadata' self.write_app_metadata(result_classes, a, analysis_total_time, apk_size, create_graph_total_time, write_graph_total_time) #debugging # with open(self.graph_out_path+'.runmetrics', 'w') as f: # json.dump() self.progress = 100 self.status = 'Done' self.paused.wait( ) #wait for caller to collect last status and reset event before finishing app.logger.info('done')
from androguard.misc import AnalyzeAPK a, d, dx = AnalyzeAPK('Virus0e69af88dcbb469e30f16609b10c926c.apk') activity = a.get_activities() service = a.get_services() provider = a.get_providers() receiver = a.get_receivers() permission = a.get_permissions() print(activity) print(service) print(provider) print(receiver) print(permission) ''' ['com.security.service.MainActivity'] [] [] ['com.security.service.receiver.ActionReceiver', 'com.security.service.receiver.SmsReceiver', 'com.security.service.receiver.RebootReceiver'] ['android.permission.RECEIVE_SMS', 'android.permission.SEND_SMS'] '''
from pprint import pprint from androguard.session import Session from androguard.misc import AnalyzeAPK a, d, dx = AnalyzeAPK('/home/branden/apks/dendroid.apk', session=Session()) print('here') acts = a.get_activities() print('here2') print(acts)
class SmartInput(object): default = '123456' type_class = { 'TYPE_NULL': '', 'TYPE_CLASS_TEXT': 'example', 'TYPE_CLASS_NUMBER': '1', 'TYPE_CLASS_PHONE': '3453453456', 'TYPE_CLASS_DATETIME': '03032015' } type_variation = { 'TYPE_TEXT_VARIATION_NORMAL': 'example', 'TYPE_TEXT_VARIATION_URI': 'https://www.example.com', 'TYPE_TEXT_VARIATION_EMAIL_ADDRESS': '*****@*****.**', 'TYPE_TEXT_VARIATION_EMAIL_SUBJECT': 'Example Email Subject', 'TYPE_TEXT_VARIATION_SHORT_MESSAGE': 'Example Short Message', 'TYPE_TEXT_VARIATION_LONG_MESSAGE': 'This is an example of a very long message for an input text.', 'TYPE_TEXT_VARIATION_PERSON_NAME': 'John Smith', 'TYPE_TEXT_VARIATION_POSTAL_ADDRESS': '16100', 'TYPE_TEXT_VARIATION_PASSWORD': '******', 'TYPE_TEXT_VARIATION_VISIBLE_PASSWORD': '******', 'TYPE_TEXT_VARIATION_WEB_EDIT_TEXT': '', 'TYPE_TEXT_VARIATION_FILTER': '', 'TYPE_TEXT_VARIATION_PHONETIC': '', 'TYPE_TEXT_VARIATION_WEB_EMAIL_ADDRESS': '*****@*****.**', 'TYPE_TEXT_VARIATION_WEB_PASSWORD': '******', 'TYPE_NUMBER_VARIATION_NORMAL': '3453453456', 'TYPE_NUMBER_VARIATION_PASSWORD': '******', 'TYPE_DATETIME_VARIATION_NORMAL': '03032015', 'TYPE_DATETIME_VARIATION_DATE': '03032015', 'TYPE_DATETIME_VARIATION_TIME': '000000' } def __init__(self, apk_path: str): self.logger = logging.getLogger('{0}.{1}'.format( __name__, self.__class__.__name__)) self.logger.info('Smart input generation') self.smart_inputs = {} self.apk: APK = None self.dx: Analysis = None self.apk, _, self.dx = AnalyzeAPK(apk_path) tmp_edit_text_classes = self.get_subclass_names( 'Landroid/widget/EditText;') # Convert EditText classes to dot notation ('EditText' is a built-in class so the prefix is not needed). # This notation will be used when looking for text inputs in the xml layout files. self.edit_text_classes = {'EditText'} for clazz in tmp_edit_text_classes: self.edit_text_classes.add( re.search('L(.*);', clazz).group(1).replace('/', '.')) try: self.class_object_list = [ clazz.get_vm_class() for clazz in self.dx.get_internal_classes() ] self.classes_dict = self.get_class_dict() # Find the R$id classes. self.resource_ids = self.get_resource_ids(self.class_object_list) # Find the R$layout classes. self.resource_layouts = self.get_resource_layouts( self.class_object_list) self.field_refs = get_field_refs(self.resource_ids) self.find_text_fields() except Exception as e: self.logger.error( 'Error during smart input generation: {0}'.format(e)) raise def get_subclass_names(self, class_name: str): subclass_names = set() edit_text_class = self.dx.get_class_analysis(class_name) if edit_text_class: for clazz in edit_text_class.get_xref_from(): if clazz.get_vm_class().get_superclassname() == class_name: subclass_name = clazz.get_vm_class().get_name() subclass_names.add(subclass_name) subclass_names.update( self.get_subclass_names(subclass_name)) return subclass_names # Return a dict with the class names and the class objects. def get_class_dict(self): classes = {} for clazz in self.class_object_list: # Get the name of the class using the dot notation. clazz_name = re.search('L(.*);', clazz.get_name()).group(1).replace( '/', '.') classes[clazz_name] = clazz return classes # Get R$id classes. def get_resource_ids(self, classes): resource_ids = [] for clazz in classes: if clazz.get_name().endswith('R$id;'): self.logger.debug('Found R$id class at {0}'.format( clazz.get_name())) resource_ids.append(clazz) return resource_ids # Get R$layout classes. def get_resource_layouts(self, classes): resource_layouts = [] for clazz in classes: if clazz.get_name().endswith('R$layout;'): self.logger.debug('Found R$layout class at {0}'.format( clazz.get_name())) resource_layouts.append(clazz) return resource_layouts def get_xml_from_file(self, xml_file): ap = AXMLPrinter(self.apk.get_file(xml_file)) return minidom.parseString(ap.get_buff()) # Return every instance of an EditText field and their inputType in the XML. # Not all EditText fields will have an inputType specified in the XML. def get_input_fields_with_input_types_from_xml(self, xml_file): input_fields = {} xml_content = self.get_xml_from_file(xml_file) for edit_text_tag in self.edit_text_classes: for item in xml_content.getElementsByTagName(edit_text_tag): android_id = None input_type = {'type': None, 'is_password': False} for k, v in item.attributes.itemsNS(): if k[1] == 'id': android_id = v[1:] if k[1] == 'inputType': input_type['type'] = v if k[1] == 'password': # Deprecated, only inputType should be used, but some apps still use this. input_type['is_password'] = True if v.lower( ) == 'true' else False if android_id: input_fields[hex(int(android_id, 16))] = input_type return input_fields def parse_move(self, bc, index): i = bc.get_instruction(index) register = i.get_output().split(',')[1].strip() for x in range(index - 1, -1, -1): i = bc.get_instruction(x) if 'const' in i.get_name() and register in i.get_output(): return parse_const(bc.get_instruction(x)) def get_activity_xml(self, activity_class): # Build a list of every layout hex value referenced in activity's bytecode. hex_codes = [] for method in activity_class.get_methods(): if method.get_name() == 'onCreate': try: for index, instruction in enumerate( method.get_instructions()): # Find setContentView, then parse the passed value from the previous # const or const/high16 instruction. if 'setContentView' in instruction.show_buff(0): instruction = method.get_code().get_bc( ).get_instruction(index - 1) if 'const' in instruction.get_name(): hex_codes.append(parse_const(instruction)) elif 'move' in instruction.get_name(): hex_codes.append( self.parse_move(method.get_code().get_bc(), index - 1)) except Exception: pass # Cross check the list of hex codes with R$layout to retrieve XML layout file name. for layout in self.resource_layouts: for field in layout.get_fields(): if hex(field.get_init_value().get_value()) in hex_codes: return 'res/layout/{0}.xml'.format(field.get_name()) return None def get_input_field_from_code(self, class_object: ClassDefItem, field_id: str): self.logger.debug('Analyzing field {0}'.format(field_id)) for method in class_object.get_methods(): instructions = iter(method.get_instructions()) for instruction in instructions: if ('const' == instruction.get_name() or 'const/high16' == instruction.get_name()) \ and field_id == parse_const(instruction): # Get the register in which the constant is assigned. register = instruction.get_output().split(',')[0].strip() while True: try: last_instruction = instruction instruction = next(instructions) except StopIteration: self.logger.debug( 'Could not get input field {0} from code'. format(field_id)) return None # Follow the register to the next invoke-virtual of findViewById... if (register in instruction.get_output() and 'findViewById' in instruction.get_output()) \ and 'invoke-virtual' in instruction.get_name(): # ...and get the register of that output. register = instruction.get_output().split( ',')[1].strip() elif instruction.get_name() == 'move-result-object' and \ 'invoke-virtual' in last_instruction.get_name(): register = instruction.get_output().strip() elif (instruction.get_name() == 'iput-object' or instruction.get_name() == 'sput-object') and \ register in instruction.get_output().split(',')[0].strip(): out_sp = re.search( r'.*, (.*)->(\b[\w]*\b) (.*)', instruction.get_output()).groups() try: field_analysis = list( self.dx.find_fields( out_sp[0], out_sp[1], out_sp[2])) if field_analysis: return field_analysis[0] else: for field in self.dx.get_class_analysis( out_sp[0]).get_vm_class( ).get_fields(): if field.get_name() == out_sp[ 1] and field.get_descriptor( ) == out_sp[2]: return FieldClassAnalysis(field) except Exception: return None return None def find_text_fields(self): try: # Get all the input fields from the xml layout files. input_fields = {} for xml_layout_file in filter(lambda x: x.startswith('res/layout'), self.apk.get_files()): try: input_fields.update( self.get_input_fields_with_input_types_from_xml( xml_layout_file)) except Exception: pass # Combine all information into a TextField dict. text_fields = {} for field_id in input_fields: text_fields[field_id] = TextField( field_id, self.field_refs[field_id].get_name(), input_fields[field_id]['type'], self.field_refs[field_id], is_password=input_fields[field_id]['is_password']) self.smart_inputs['all'] = list(text_fields.values()) # Group input fields by activity (if possible). for activity_name in self.apk.get_activities(): self.logger.debug( 'Analyzing activity {0}'.format(activity_name)) if activity_name in self.classes_dict: # Include also the internal classes of the activity. class_objects = [ self.classes_dict[dot_class_name] for dot_class_name in self.classes_dict if dot_class_name == activity_name or dot_class_name.startswith('{0}$'.format(activity_name)) ] input_types_for_fields = {} for class_object in class_objects: # Find all XML layouts referenced in setContentView in activity bytecode. activity_xml_file = self.get_activity_xml(class_object) if not activity_xml_file: continue try: input_types_for_fields.update( self. get_input_fields_with_input_types_from_xml( activity_xml_file)) except Exception: pass if not input_types_for_fields: self.logger.debug( 'No XMLs found for activity {0}'.format( activity_name)) continue # Combine all information into a TextField dict. text_fields = {} for field_id in input_types_for_fields: for class_object in class_objects: field = self.get_input_field_from_code( class_object, field_id) if field: tf = TextField( field_id, self.field_refs[field_id].get_name(), input_types_for_fields[field_id]['type'], self.field_refs[field_id], field, is_password=input_types_for_fields[ field_id]['is_password']) text_fields[field_id] = tf else: tf = TextField( field_id, self.field_refs[field_id].get_name(), input_types_for_fields[field_id]['type'], self.field_refs[field_id], is_password=input_types_for_fields[ field_id]['is_password']) if field_id not in text_fields: text_fields[field_id] = tf if not text_fields: self.logger.debug( 'No text fields found for activity {0}'.format( activity_name)) else: self.smart_inputs[activity_name] = list( text_fields.values()) except Exception as e: self.logger.warning( 'There was a problem during the search for text fields: {0}'. format(e)) finally: if len(self.smart_inputs) > 0: self.logger.debug('{0} text fields identified'.format( len(self.smart_inputs))) return self.smart_inputs def get_smart_input_for_id(self, input_id: str): # No id was provided, return the default text. if not input_id: return self.default to_return = None item = None if 'all' in self.smart_inputs: for item in self.smart_inputs['all']: if item.name == input_id: if item.type_variation in self.type_variation: to_return = self.type_variation[item.type_variation] break if item.type_class in self.type_class: to_return = self.type_class[item.type_class] break if to_return and item: # This field requires a specific input. self.logger.info('Possible input for Editable({0}): "{1}"'.format( item, to_return)) elif 'username' in input_id.lower() or 'email' in input_id.lower( ) or 'user' in input_id.lower(): # Maybe this is a username field. to_return = self.type_variation[ 'TYPE_TEXT_VARIATION_EMAIL_ADDRESS'] self.logger.info( 'Using username input for Editable(id={0}): "{1}"'.format( input_id, to_return)) elif 'password' in input_id.lower() or 'pwd' in input_id.lower( ) or 'secret' in input_id.lower(): # Maybe this is a password field. to_return = self.type_variation['TYPE_TEXT_VARIATION_PASSWORD'] self.logger.info( 'Using password input for Editable(id={0}): "{1}"'.format( input_id, to_return)) else: # No hint for this field, using the default text. to_return = self.default self.logger.info( 'Using default input for Editable(id={0}): "{1}"'.format( input_id, to_return)) return to_return