def extract_permissions(file): a = APK(file) d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) return a.get_permissions()
def __analyze_dex(self, dex_file, raw=False): # DalvikVMFormat dalvik_vm_format = None if raw == False: dalvik_vm_format = DalvikVMFormat(open(dex_file, "rb").read()) else: dalvik_vm_format = DalvikVMFormat(dex_file) # VMAnalysis vm_analysis = VMAnalysis(dalvik_vm_format) dalvik_vm_format.set_vmanalysis(vm_analysis) return vm_analysis
def __init__(self, args): self.apk = args.apk self.verbosity = args.verbosity self.output_location = args.output_location self.file_identifier = args.apk.split('.')[0] self.file_identifier = self.file_identifier[-24:] # print "Analyzing " + self.apk # print " Output Location " + self.output_location # print "File Identifier " + self.file_identifier # analyze the dex file print "From LOCATION = ", self.apk self.a = APK(self.apk) # get the vm analysis self.d = DalvikVMFormat(self.a.get_dex()) self.dx = VMAnalysis(self.d) self.gx = GVMAnalysis(self.dx, None) self.d.set_vmanalysis(self.dx) self.d.set_gvmanalysis(self.gx) # create the cross reference self.d.create_xref() self.d.create_dref() print 'CWD: ', os.getcwd() predictor = Predict_Input(self.output_location, self.file_identifier) self.predictions = predictor.predict(self.apk, self.apk[:-4], self.output_location, self.file_identifier) try: # get the classes for this apk # store them in a dict self.classes = self.get_class_dict() # Find the R$layout class self.Rlayout = self.get_RLayout(self.d.get_classes()) # Find the R$id class self.Rid = self.get_Rid(self.d.get_classes()) # Store all fields referenced in R$id self.fields, self.field_refs = self.get_fields(self.Rid) except Exception, e: print e
def __init__(self, apk_name): self.apk_name = apk_name self.apk = INPUT_APK_DIR + self.apk_name + ".apk" # analyze the dex file self.a = APK(self.apk) # get the vm analysis self.d = DalvikVMFormat(self.a.get_dex()) self.dx = VMAnalysis(self.d) self.gx = GVMAnalysis(self.dx, None) self.d.set_vmanalysis(self.dx) self.d.set_gvmanalysis(self.gx) # create the cross reference self.d.create_xref() self.d.create_dref()
def process_vm(self): """ Process the application's classes.dex Args: None Results: None """ # Make sure classes.dex exists if self.find_dex(): self.dex = self.apk.get_dex() # Analyze classes.dex # TODO Throw in a progress bar, this can take awhile if self.dex: self.logger.log("info", "Loading classes.dex ...") from androguard.core.bytecodes.dvm import DalvikVMFormat from androguard.core.analysis.analysis import VMAnalysis from androguard.core.analysis.ganalysis import GVMAnalysis # Create a new virtual machine instance self.vm = DalvikVMFormat(self.dex) if self.vm: print(self.t.yellow("\n\t--> Loaded classes.dex (!)\n")) self.logger.log("info", "Analyzing classes.dex ...") # Analyze the virtual machine instance self.vmx = VMAnalysis(self.vm) self.gmx = GVMAnalysis(self.vmx, None) if self.vmx and self.gmx: print(self.t.yellow("\n\t--> Analyzed classes.dex (!)\n")) self.vm.set_vmanalysis(self.vmx) self.vm.set_gvmanalysis(self.gmx) # Generate xref(s) self.vm.create_xref() self.vm.create_dref() else: CommandError("Cannot analyze VM instance (!)") else: CommandError("Cannot load VM instance (!)") else: CommandError("classes.dex not found (!)")
def __init__(self, args): self.apk = args.apk self.verbosity = args.verbosity print "Analyzing " + self.apk # analyze the dex file self.a = APK(self.apk) # get the vm analysis self.d = DalvikVMFormat(self.a.get_dex()) self.dx = VMAnalysis(self.d) self.gx = GVMAnalysis(self.dx, None) self.d.set_vmanalysis(self.dx) self.d.set_gvmanalysis(self.gx) # create the cross reference self.d.create_xref() self.d.create_dref() try: # get the classes for this apk # store them in a dict self.classes = self.get_class_dict() # Find the R$layout class self.Rlayout = self.get_RLayout(self.d.get_classes()) # Find the R$id class self.Rid = self.get_Rid(self.d.get_classes()) # Store all fields referenced in R$id self.fields, self.field_refs = self.get_fields(self.Rid) except Exception, e: print e
def process_vm(self, apk=False, dex=False): """ Process the application's classes.dex Args: param1 = boolean param2 = boolean Results: None """ try: if apk: # Make sure the APK contains a classes.dex file if self.find_dex(): self.dex = self.apk.get_dex() if self.dex: self.logger.log("info", "Loading classes.dex ...") from androguard.core.bytecodes.dvm import DalvikVMFormat from androguard.core.analysis.analysis import VMAnalysis from androguard.core.analysis.ganalysis import GVMAnalysis # Create a DalvikVMFormat instance ... # In this case self.dex will be a file type self.vm = DalvikVMFormat(self.dex) if self.vm: print(self.t.yellow("\n\t--> Loaded classes.dex (!)\n")) self.logger.log("info", "Analyzing classes.dex ...") # Analyze the DalvikVMFormat instance and return # analysis instances of VMAnalysis and GVMAnalysis self.vmx = VMAnalysis(self.vm) self.gmx = GVMAnalysis(self.vmx, None) if self.vmx and self.gmx: print(self.t.yellow("\n\t--> Analyzed classes.dex (!)\n")) # Set the analysis properties on the # DalvikVMFormat instance self.vm.set_vmanalysis(self.vmx) self.vm.set_gvmanalysis(self.gmx) # Generate xref(s) and dref(s) self.vm.create_xref() self.vm.create_dref() return else: CommandError("process_vm : Cannot analyze VM instance (!)") return else: CommandError("process_vm : Cannot load VM instance (!)") return else: CommandError("process_vm : classes.dex not found (!)") return if dex: if self.dex: from androguard.core.bytecodes.dvm import DalvikVMFormat from androguard.core.analysis.analysis import VMAnalysis from androguard.core.analysis.ganalysis import GVMAnalysis # Analyze the DalvikVMFormat instance and return # analysis instances of VMAnalysis and GVMAnalysis self.vm = DalvikVMFormat(self.util.read(self.dex)) if self.vm: print(self.t.yellow("\n\t--> Loaded {} (!)\n" .format(self.dex .split("/")[-1]))) self.logger.log("info", "Analyzing {} ..." .format(self.dex .split("/")[-1])) # Set the analysis properties on the # DalvikVMFormat instance self.vmx = VMAnalysis(self.vm) self.gmx = GVMAnalysis(self.vmx, None) if self.vmx and self.gmx: print(self.t.yellow("\n\t--> Analyzed {} (!)\n" .format(self.dex .split("/")[-1]))) # Set the analysis properties on the # DalvikVMFormat instance self.vm.set_vmanalysis(self.vmx) self.vm.set_gvmanalysis(self.gmx) # Generate xref(s) and dref(s) self.vm.create_xref() self.vm.create_dref() return else: CommandError("process_vm :" + "Cannot analyze VM instance (!)") return else: CommandError("process_vm :" + "Cannot load VM instance (!)") return else: CommandError("process_vm : classes.dex not found (!)") return except Exception as e: CommandError("process_vm : {}".format(e))
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.tainted_packages.search_methods(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) return result
from androguard import * from androguard.core.bytecodes import apk from androguard.core.bytecodes import dvm from androguard.core.analysis.analysis import VMAnalysis if __name__ == '__main__': path = "crackme02.apk" a = apk.APK(path) d = dvm.DalvikVMFormat(a.get_dex()) x = VMAnalysis(d) for method in d.get_methods(): g = x.get_method(method) if method.get_code() == None: continue print method.get_class_name(), method.get_name(), method.get_descriptor() idx = 0 for i in g.get_basic_blocks().get(): print "\t %s %x %x" % (i.name, i.start, i.end), '[ NEXT = ', ', '.join( "%x-%x-%s" % (j[0], j[1], j[2].get_name()) for j in i.get_next() ), ']', '[ PREV = ', ', '.join( j[2].get_name() for j in i.get_prev() ), ']' for ins in i.get_instructions(): print "\t\t %x" % idx, ins.get_name(), ins.get_output() idx += ins.get_length() print ""
def __init_androguard_objects(self, apk_file): self._a = apk.APK(apk_file) self._d = dvm.DalvikVMFormat(self._a.get_dex()) self._dx = VMAnalysis(self._d) self._cm = self._d.get_class_manager() self._strings = self._d.get_strings()
class StaticAPKAnalyzer(): # performs static analysis on given apk file def __init__(self, output_format=None): self._apk_data = dict() self._a = None self._d = None self._dx = None self._cm = None self._strings = None # set output parameters categories = [ 'files', 'features', 'intent_filters', 'activities', 'req_permissions', 'used_permissions', 'api_calls', 'crypto_calls', 'net_calls', 'telephony_calls', 'suspicious_calls', 'dynamic_calls', 'native_calls', 'reflection_calls', 'urls', 'providers', 'receivers', 'services', 'libraries' ] self._out = { 'format': output_format, 'feat_len': 80, 'categories': categories } def analyze(self, apk_file): self._apk_data = dict() self.__init_androguard_objects(apk_file) self.__extract_features(apk_file) def set_max_output_feat_len(self, feat_len): # set maximal length of feature strings self._out['feat_len'] = feat_len def set_output_categories(self, categories): # specify feature categories that should be printed, by default, all extracted features are written to output. self._out['categories'] = categories def __init_androguard_objects(self, apk_file): self._a = apk.APK(apk_file) self._d = dvm.DalvikVMFormat(self._a.get_dex()) self._dx = VMAnalysis(self._d) self._cm = self._d.get_class_manager() self._strings = self._d.get_strings() def __extract_features(self, apk_file): self.__calc_hashes(apk_file) self.__extract_apk_obj_features() # extract features from vm analysis object used_perms_dict = self._dx.get_permissions([]) self._apk_data['used_permissions'] = used_perms_dict.keys() for paths in used_perms_dict.values(): self.__extract_dx_features('api_calls', paths) paths = self._dx.tainted_packages.search_crypto_packages() self.__extract_dx_features('crypto_calls', paths) paths = self._dx.tainted_packages.search_net_packages() self.__extract_dx_features('net_calls', paths) paths = self._dx.tainted_packages.search_telephony_packages() self.__extract_dx_features('telephony_calls', paths) paths = self._dx.get_tainted_packages().search_methods( "Ldalvik/system/DexClassLoader;", ".", ".") self.__extract_dx_features('dynamic_calls', paths) paths = self._dx.get_tainted_packages().search_methods( "Ljava/lang/reflect/Method;", ".", ".") self.__extract_dx_features('reflection_calls', paths) self.__extract_native_calls() self.__extract_urls() self.__extract_suspicious_calls() def __calc_hashes(self, apk_file): self._apk_data['md5'] = get_file_hash('md5', apk_file) self._apk_data['sha256'] = get_file_hash('sha256', apk_file) def __extract_apk_obj_features(self): self._apk_data['apk_name'] = str(basename(self._a.get_filename())) self._apk_data['package_name'] = str(self._a.get_package()) self._apk_data['sdk_version'] = str(self._a.get_min_sdk_version()) self._apk_data['features'] = self._a.get_elements( 'uses-feature', 'android:name') self._apk_data['files'] = self._a.get_files() self._apk_data['activities'] = self._a.get_activities() self._apk_data['providers'] = self._a.get_providers() self._apk_data['req_permissions'] = self._a.get_permissions() self._apk_data['receivers'] = self._a.get_receivers() self._apk_data['services'] = self._a.get_services() self._apk_data['libraries'] = self._a.get_libraries() self._apk_data['intent_filters'] = self._a.get_elements( 'action', 'android:name') + self._a.get_elements( 'category', 'android:name') def __extract_dx_features(self, category, paths): self._apk_data[category] = dict() for path in paths: class_name = path.get_dst(self._cm)[0] method_name = path.get_dst(self._cm)[1] if method_name.find('init') > 0: method_name = 'init' method_name = class_name[1:] + '->' + method_name self._apk_data[category][method_name] = 1 def __extract_native_calls(self): self._apk_data['native_calls'] = dict() for method in self._d.get_methods(): # this condition is copied from show_NativeCalls() if method.get_access_flags() & 0x100: class_name = method.get_class_name() method_name = method.get_name() if method_name.find('init') > 0: method_name = 'init' method_name = class_name[1:] + '->' + method_name self._apk_data['native_calls'][method_name] = 1 def __extract_urls(self): # get urls ip_regex = '(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})' url_regex = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|\ (?:%[0-9a-fA-F][0-9a-fA-F]))+' self._apk_data['urls'] = dict() for string in self._strings: # search for ip addresses ip = re.search(ip_regex, string) if None != ip: ip = ip.group() self._apk_data['urls'][ip] = 1 # search for urls url = re.search(url_regex, string) if None != url: url = urllib.quote(url.group(), '>:/?') self._apk_data['urls'][url] = 1 # add hostname o = urlparse(url) hostname = o.netloc self._apk_data['urls'][hostname] = 1 def __extract_suspicious_calls(self): sus_calls = [ 'Ljava/net/HttpURLconnection;->setRequestMethod', 'Ljava/net/HttpURLconnection', 'getExternalStorageDirectory', 'getSimCountryIso', 'execHttpRequest', 'sendTextMessage', 'Lorg/apache/http/client/methods/HttpPost', 'getSubscriberId', 'Landroid/telephony/SmsMessage;->getMessageBody', 'getDeviceId', 'getPackageInfo', 'getSystemService', 'getWifiState', 'system/bin/su', 'system/xbin/su', 'setWifiEnabled', 'setWifiDisabled', 'Cipher', 'Ljava/io/IOException;->printStackTrace', 'android/os/Exec', 'Ljava/lang/Runtime;->exec' ] sus_calls = dict(zip(sus_calls, np.ones(len(sus_calls)))) self._apk_data['suspicious_calls'] = dict() for string in self._strings: for sc in sus_calls: if string.find(sc) >= 0: self._apk_data['suspicious_calls'][string] = 1 sus_tuples = [('java/net/HttpURLconnection', 'setRequestMethod'), ('android/telephony/SmsMessage', 'getMessageBody'), ('java/io/IOException', 'printStackTrace'), ('java/lang/Runtime', 'exec')] for tpl in sus_tuples: class_name = tpl[0][1:] name = tpl[1] paths = self._dx.tainted_packages.search_methods( class_name, name, '') for path in paths: method = path.get_dst(self._cm) method_full = method[0] + '->' + method[1] self._apk_data['suspicious_calls'][method_full] = 1 def __str__(self): if self._out['format'] == 'xml': out_str = self.__create_xml_string() else: out_str = self.__get_feature_strings() return out_str def __get_feature_strings(self): feat_str = '' for category in self._out['categories']: if category not in self._apk_data: continue for item in self._apk_data[category]: feat_str += '\n{0}::{1}'\ .format(category, item[:self._out['feat_len']]) return feat_str[1:] def __create_xml_string(self): xml_str = '<static>' xml_str += self.__get_info_string() for category in self._out['categories']: xml_str += self.__get_category_string(category) xml_str += '\n</static>' doc = parseString("" + xml_str + "") xml = doc.toxml().replace('<static>', '\n<static>') return xml def __get_info_string(self): istr = '\n\t<info>' istr += '\n\t\t<sha256>' + str(self._apk_data['sha256']) + '</sha256>' istr += '\n\t\t<md5>' + str(self._apk_data['md5']) + '</md5>' istr += '\n\t\t<apk_name>' + self._apk_data['apk_name'] + '</apk_name>' istr += '\n\t\t<package_name>' + self._apk_data[ 'package_name'] + '</package_name>' istr += '\n\t\t<sdk_version>' + self._apk_data[ 'sdk_version'] + '</sdk_version>' istr += '\n\t</info>' return istr def __get_category_string(self, category): cat_str = '\n\t<{}>'.format(category) for item in self._apk_data[category]: field = self.__get_field_name(category) cat_str += '\n\t\t<{0}>{1}</{0}>'\ .format(field, item[:self._out['feat_len']]) cat_str += '\n\t</{}>'.format(category) return cat_str @staticmethod def __get_field_name(category): if category.endswith('ies'): return category[:-3] + 'y' else: return category[:-1]
from androguard.core.analysis.analysis import VMAnalysis from androguard.core.bytecodes.apk import APK from androguard.core.bytecodes.dvm import DalvikVMFormat from core.analysis import * if __name__ == '__main__': a = APK("1_1.apk") print len(a.get_activities()) print a.get_main_activity() d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) print dx.get_method_signature()
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() #result['strings'] = d.get_strings() #result['class_names'] = [c.get_name() for c in d.get_classes()] #result['method_names'] = [m.get_name() for m in d.get_methods()] #result['field_names'] = [f.get_name() for f in d.get_fields()] class_names = [c.get_name() for c in d.get_classes()] method_names = [m.get_name() for m in d.get_methods()] field_names = [ f.get_name() for f in d.get_fields()] result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] #s_list.extend(result['class_names']) #s_list.extend(result['method_names']) #s_list.extend(result['field_names']) s_list.extend(class_names) s_list.extend(method_names) s_list.extend(method_names) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.tainted_packages.search_methods(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) opt_seq = [] for m in d.get_methods(): for i in m.get_instructions(): opt_seq.append(i.get_name()) optngramlist = [tuple(opt_seq[i:i+NGRAM]) for i in xrange(len(opt_seq) - NGRAM)] optngram = Counter(optngramlist) optcodes = dict() tmpCodes = dict(optngram) #for k,v in optngram.iteritems(): # if v>=NGRAM_THRE: #optcodes[str(k)] = v # optcodes[str(k)] = 1 tmpCodes = sorted(tmpCodes.items(),key =lambda d:d[1],reverse=True) for value in tmpCodes[:NGRAM_THRE]: optcodes[str(value[0])] = 1 result['feature_vectors']['opt_codes'] = optcodes return result
class StaticAPKAnalyzer(): # performs static analysis on given apk file def __init__(self, output_format=None): self._apk_data = dict() self._a = None self._d = None self._dx = None self._cm = None self._strings = None # set output parameters categories = ['files', 'features', 'intent_filters', 'activities', 'req_permissions', 'used_permissions', 'api_calls', 'crypto_calls', 'net_calls', 'telephony_calls', 'suspicious_calls', 'dynamic_calls', 'native_calls', 'reflection_calls', 'urls', 'providers', 'receivers', 'services', 'libraries'] self._out = {'format': output_format, 'feat_len': 80, 'categories': categories} def analyze(self, apk_file): self._apk_data = dict() self.__init_androguard_objects(apk_file) self.__extract_features(apk_file) def set_max_output_feat_len(self, feat_len): # set maximal length of feature strings self._out['feat_len'] = feat_len def set_output_categories(self, categories): # specify feature categories that should be printed, by default, all extracted features are written to output. self._out['categories'] = categories def __init_androguard_objects(self, apk_file): self._a = apk.APK(apk_file) self._d = dvm.DalvikVMFormat(self._a.get_dex()) self._dx = VMAnalysis(self._d) self._cm = self._d.get_class_manager() self._strings = self._d.get_strings() def __extract_features(self, apk_file): self.__calc_hashes(apk_file) self.__extract_apk_obj_features() # extract features from vm analysis object used_perms_dict = self._dx.get_permissions([]) self._apk_data['used_permissions'] = used_perms_dict.keys() for paths in used_perms_dict.values(): self.__extract_dx_features('api_calls', paths) paths = self._dx.tainted_packages.search_crypto_packages() self.__extract_dx_features('crypto_calls', paths) paths = self._dx.tainted_packages.search_net_packages() self.__extract_dx_features('net_calls', paths) paths = self._dx.tainted_packages.search_telephony_packages() self.__extract_dx_features('telephony_calls', paths) paths = self._dx.get_tainted_packages().search_methods("Ldalvik/system/DexClassLoader;", ".", ".") self.__extract_dx_features('dynamic_calls', paths) paths = self._dx.get_tainted_packages().search_methods("Ljava/lang/reflect/Method;", ".", ".") self.__extract_dx_features('reflection_calls', paths) self.__extract_native_calls() self.__extract_urls() self.__extract_suspicious_calls() def __calc_hashes(self, apk_file): self._apk_data['md5'] = get_file_hash('md5', apk_file) self._apk_data['sha256'] = get_file_hash('sha256', apk_file) def __extract_apk_obj_features(self): self._apk_data['apk_name'] = str(basename(self._a.get_filename())) self._apk_data['package_name'] = str(self._a.get_package()) self._apk_data['sdk_version'] = str(self._a.get_min_sdk_version()) self._apk_data['features'] = self._a.get_elements('uses-feature', 'android:name') self._apk_data['files'] = self._a.get_files() self._apk_data['activities'] = self._a.get_activities() self._apk_data['providers'] = self._a.get_providers() self._apk_data['req_permissions'] = self._a.get_permissions() self._apk_data['receivers'] = self._a.get_receivers() self._apk_data['services'] = self._a.get_services() self._apk_data['libraries'] = self._a.get_libraries() self._apk_data['intent_filters'] = self._a.get_elements('action', 'android:name') + self._a.get_elements('category', 'android:name') def __extract_dx_features(self, category, paths): self._apk_data[category] = dict() for path in paths: class_name = path.get_dst(self._cm)[0] method_name = path.get_dst(self._cm)[1] if method_name.find('init') > 0: method_name = 'init' method_name = class_name[1:] + '->' + method_name self._apk_data[category][method_name] = 1 def __extract_native_calls(self): self._apk_data['native_calls'] = dict() for method in self._d.get_methods(): # this condition is copied from show_NativeCalls() if method.get_access_flags() & 0x100: class_name = method.get_class_name() method_name = method.get_name() if method_name.find('init') > 0: method_name = 'init' method_name = class_name[1:] + '->' + method_name self._apk_data['native_calls'][method_name] = 1 def __extract_urls(self): # get urls ip_regex = '(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})' url_regex = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|\ (?:%[0-9a-fA-F][0-9a-fA-F]))+' self._apk_data['urls'] = dict() for string in self._strings: # search for ip addresses ip = re.search(ip_regex, string) if None != ip: ip = ip.group() self._apk_data['urls'][ip] = 1 # search for urls url = re.search(url_regex, string) if None != url: url = urllib.quote(url.group(), '>:/?') self._apk_data['urls'][url] = 1 # add hostname o = urlparse(url) hostname = o.netloc self._apk_data['urls'][hostname] = 1 def __extract_suspicious_calls(self): sus_calls = ['Ljava/net/HttpURLconnection;->setRequestMethod', 'Ljava/net/HttpURLconnection', 'getExternalStorageDirectory', 'getSimCountryIso', 'execHttpRequest', 'sendTextMessage', 'Lorg/apache/http/client/methods/HttpPost', 'getSubscriberId', 'Landroid/telephony/SmsMessage;->getMessageBody', 'getDeviceId', 'getPackageInfo', 'getSystemService', 'getWifiState', 'system/bin/su', 'system/xbin/su', 'setWifiEnabled', 'setWifiDisabled', 'Cipher', 'Ljava/io/IOException;->printStackTrace', 'android/os/Exec', 'Ljava/lang/Runtime;->exec'] sus_calls = dict(zip(sus_calls, np.ones(len(sus_calls)))) self._apk_data['suspicious_calls'] = dict() for string in self._strings: for sc in sus_calls: if string.find(sc) >= 0: self._apk_data['suspicious_calls'][string] = 1 sus_tuples = [('java/net/HttpURLconnection', 'setRequestMethod'), ('android/telephony/SmsMessage', 'getMessageBody'), ('java/io/IOException', 'printStackTrace'), ('java/lang/Runtime', 'exec')] for tpl in sus_tuples: class_name = tpl[0][1:] name = tpl[1] paths = self._dx.tainted_packages.search_methods(class_name, name, '') for path in paths: method = path.get_dst(self._cm) method_full = method[0] + '->' + method[1] self._apk_data['suspicious_calls'][method_full] = 1 def __str__(self): if self._out['format'] == 'xml': out_str = self.__create_xml_string() else: out_str = self.__get_feature_strings() return out_str def __get_feature_strings(self): feat_str = '' for category in self._out['categories']: if category not in self._apk_data: continue for item in self._apk_data[category]: feat_str += '\n{0}::{1}'\ .format(category, item[:self._out['feat_len']]) return feat_str[1:] def __create_xml_string(self): xml_str = '<static>' xml_str += self.__get_info_string() for category in self._out['categories']: xml_str += self.__get_category_string(category) xml_str += '\n</static>' doc = parseString("" + xml_str + "") xml = doc.toxml().replace('<static>', '\n<static>') return xml def __get_info_string(self): istr = '\n\t<info>' istr += '\n\t\t<sha256>' + str(self._apk_data['sha256']) + '</sha256>' istr += '\n\t\t<md5>' + str(self._apk_data['md5']) + '</md5>' istr += '\n\t\t<apk_name>' + self._apk_data['apk_name'] + '</apk_name>' istr += '\n\t\t<package_name>' + self._apk_data['package_name'] + '</package_name>' istr += '\n\t\t<sdk_version>' + self._apk_data['sdk_version'] + '</sdk_version>' istr += '\n\t</info>' return istr def __get_category_string(self, category): cat_str = '\n\t<{}>'.format(category) for item in self._apk_data[category]: field = self.__get_field_name(category) cat_str += '\n\t\t<{0}>{1}</{0}>'\ .format(field, item[:self._out['feat_len']]) cat_str += '\n\t</{}>'.format(category) return cat_str @staticmethod def __get_field_name(category): if category.endswith('ies'): return category[:-3] + 'y' else: return category[:-1]