def __init__(self, apkpath, core_library="androguard"): if core_library == "rizin": self.apk = RizinImp(apkpath) elif core_library == "androguard": self.apk = AndroguardImp(apkpath) self.all_strings = self.apk.get_strings()
def __init__(self, apk, core_library="androguard"): """ :param apk: the filename of the apk. """ core_library = core_library.lower() if core_library == "rizin": self.apkinfo = RizinImp(apk) elif core_library == "androguard": self.apkinfo = AndroguardImp(apk) else: raise ValueError( f"Unsupported core library for Quark: {core_library}") self.quark_analysis = QuarkAnalysis()
def apkinfo(): r = requests.get(APK_SOURCE, allow_redirects=True) file = open(APK_FILENAME, "wb") file.write(r.content) apkinfo = AndroguardImp(APK_FILENAME) yield apkinfo
class Forensic: __slots__ = ["apk", "all_strings"] def __init__(self, apkpath, core_library="androguard"): if core_library == "rizin": self.apk = RizinImp(apkpath) elif core_library == "androguard": self.apk = AndroguardImp(apkpath) self.all_strings = self.apk.get_strings() def get_all_strings(self): """ Return all the strings inside the APK with a set. :return: a set of strings containing all strings """ return self.all_strings def get_url(self): """ Return all the url strings inside the APK with a set. :return: a set of strings containing the url """ url = set() for string in self.all_strings: if extract_url(string): for url_string in extract_url(string): url.add(url_string) return url def get_ip(self): """ Return all the ip address strings inside the APK with a set. :return: a set of strings containing the ip address """ ip = set() for string in self.all_strings: if extract_ip(string): for ip_string in extract_ip(string): ip.add(ip_string) return ip def get_content(self): """ Return all the content strings inside the APK with a set. :return: a set of strings containing "content://" """ return { string for string in self.all_strings if extract_content(string) } def get_file(self): """ Return all the file strings inside the APK with a set. :return: a set of strings containing "file://" """ return {string for string in self.all_strings if extract_file(string)} def get_base64(self): """ Return all possible Base64-encoded strings in the APK. :return: a set of strings containing possible Base64-encoded string """ return { string for string in self.all_strings if validate_base64(string) } def get_android_api(self): """ Return all Android APIs in the APK. :return: a list of MethodAnalysis which contains all Android API. """ return self.apk.android_apis
class Quark: """Quark module is used to check quark's five-stage theory""" def __init__(self, apk, core_library="androguard"): """ :param apk: the filename of the apk. """ core_library = core_library.lower() if core_library == "rizin": self.apkinfo = RizinImp(apk) elif core_library == "androguard": self.apkinfo = AndroguardImp(apk) else: raise ValueError( f"Unsupported core library for Quark: {core_library}" ) self.quark_analysis = QuarkAnalysis() def find_previous_method( self, base_method, parent_function, wrapper, visited_methods=None ): """ Find the method under the parent function, based on base_method before to parent_function. This will append the method into wrapper. :param base_method: the base function which needs to be searched. :param parent_function: the top-level function which calls the basic function. :param wrapper: list is used to track each function. :param visited_methods: set with tested method. :return: None """ if visited_methods is None: visited_methods = set() method_set = self.apkinfo.upperfunc(base_method) visited_methods.add(base_method) if method_set is not None: if parent_function in method_set: wrapper.append(base_method) else: for item in method_set: # prevent to test the tested methods. if item in visited_methods: continue self.find_previous_method( item, parent_function, wrapper, visited_methods ) def find_intersection(self, first_method_set, second_method_set, depth=1): """ Find the first_method_list ∩ second_method_list. [MethodAnalysis, MethodAnalysis,...] :param first_method_set: first list that contains each MethodAnalysis. :param second_method_set: second list that contains each MethodAnalysis. :param depth: maximum number of recursive search functions. :return: a set of first_method_list ∩ second_method_list or None. """ # Check both lists are not null if not first_method_set or not second_method_set: raise ValueError("Set is Null") # find ∩ result = first_method_set & second_method_set if result: return result else: return self.method_recursive_search( depth, first_method_set, second_method_set ) def method_recursive_search( self, depth, first_method_set, second_method_set ): # Not found same method usage, try to find the next layer. depth += 1 if depth > MAX_SEARCH_LAYER: return None # Append first layer into next layer. next_level_set_1 = first_method_set.copy() next_level_set_2 = second_method_set.copy() # Extend the xref from function into next layer. for method in first_method_set: if self.apkinfo.upperfunc(method): next_level_set_1 = ( self.apkinfo.upperfunc(method) | next_level_set_1 ) for method in second_method_set: if self.apkinfo.upperfunc(method): next_level_set_2 = ( self.apkinfo.upperfunc(method) | next_level_set_2 ) return self.find_intersection( next_level_set_1, next_level_set_2, depth ) def check_sequence( self, mutual_parent, first_method_list, second_method_list ): """ Check if the first function appeared before the second function. :param mutual_parent: function that call the first function and second functions at the same time. :param first_method_list: the first show up function, which is a MethodAnalysis :param second_method_list: the second show up function, which is a MethodAnalysis :return: True or False """ state = False for first_call_method in first_method_list: for second_call_method in second_method_list: seq_table = [ (call, number) for call, number in self.apkinfo.lowerfunc(mutual_parent) if call in (first_call_method, second_call_method) ] # sorting based on the value of the number if len(seq_table) < 2: # Not Found sequence in same_method continue seq_table.sort(key=operator.itemgetter(1)) # seq_table would look like: [(getLocation, 1256), (sendSms, 1566), (sendSms, 2398)] method_list_need_check = [x[0] for x in seq_table] sequence_pattern_method = [ first_call_method, second_call_method, ] if tools.contains( sequence_pattern_method, method_list_need_check ): state = True # Record the mapping between the parent function and the wrapper method self.quark_analysis.parent_wrapper_mapping[ mutual_parent.full_name ] = self.apkinfo.get_wrapper_smali( mutual_parent, first_call_method, second_call_method ) return state def check_parameter( self, parent_function, first_method_list, second_method_list, keyword_item_list=None, ): """ Check the usage of the same parameter between two method. :param parent_function: function that call the first function and second functions at the same time. :param first_method_list: function which calls before the second method. :param second_method_list: function which calls after the first method. :return: True or False """ state = False for first_call_method in first_method_list: for second_call_method in second_method_list: pyeval = PyEval(self.apkinfo) # Check if there is an operation of the same register for bytecode_obj in self.apkinfo.get_method_bytecode( parent_function ): # ['new-instance', 'v4', Lcom/google/progress/SMSHelper;] instruction = [bytecode_obj.mnemonic] if bytecode_obj.registers is not None: instruction.extend(bytecode_obj.registers) if bytecode_obj.parameter is not None: instruction.append(bytecode_obj.parameter) # for the case of MUTF8String instruction = [str(x) for x in instruction] if instruction[0] in pyeval.eval.keys(): pyeval.eval[instruction[0]](instruction) for table in pyeval.show_table(): for val_obj in table: for c_func in val_obj.called_by_func: first_method_pattern = f"{first_call_method.class_name}->{first_call_method.name}{first_call_method.descriptor}" second_method_pattern = f"{second_call_method.class_name}->{second_call_method.name}{second_call_method.descriptor}" if ( first_method_pattern in c_func and second_method_pattern in c_func ): state = True if keyword_item_list and any( keyword_item_list ): self.check_parameter_values( c_func, ( first_method_pattern, second_method_pattern, ), keyword_item_list, ) # Record the mapping between the parent function and the wrapper method self.quark_analysis.parent_wrapper_mapping[ parent_function.full_name ] = self.apkinfo.get_wrapper_smali( parent_function, first_call_method, second_call_method, ) # Build for the call graph if state: call_graph_analysis = { "parent": parent_function, "first_call": first_call_method, "second_call": second_call_method, "apkinfo": self.apkinfo, "first_api": self.quark_analysis.first_api, "second_api": self.quark_analysis.second_api, "crime": self.quark_analysis.crime_description, } self.quark_analysis.call_graph_analysis_list.append( call_graph_analysis ) return state @staticmethod def check_parameter_values(source_str, pattern_list, keyword_item_list): for pattern, keyword_item in zip(pattern_list, keyword_item_list): if keyword_item is None: continue start_index = source_str.index(pattern) + len(pattern) end_index = -1 brackets_count = 1 for idx, char in enumerate(source_str[start_index:]): if char == "(": brackets_count += 1 elif char == ")": brackets_count -= 1 if brackets_count == 0: end_index = idx + start_index break parameter_str = source_str[start_index:end_index] for keyword in keyword_item: if str(keyword) not in parameter_str: return False return True def find_api_usage(self, class_name, method_name, descriptor_name): method_list = [] # Source method source_method = self.apkinfo.find_method( class_name, method_name, descriptor_name ) if source_method: return [source_method] # Potential Method potential_method_list = [ method for method in self.apkinfo.all_methods if method.name == method_name and method.descriptor == descriptor_name ] potential_method_list = [ method for method in potential_method_list if not next(self.apkinfo.get_method_bytecode(method), None) ] # Check if each method's class is a subclass of the given class for method in potential_method_list: current_class_set = {method.class_name} while not current_class_set.intersection( {class_name, "Ljava/lang/Object;"} ): next_class_set = set() for clazz in current_class_set: next_class_set.update( self.apkinfo.superclass_relationships[clazz] ) current_class_set = next_class_set current_class_set.discard("Ljava/lang/Object;") if current_class_set: method_list.append(method) return method_list def run(self, rule_obj): """ Run the five levels check to get the y_score. :param rule_obj: the instance of the RuleObject. :return: None """ self.quark_analysis.clean_result() self.quark_analysis.crime_description = rule_obj.crime # Level 1: Permission Check if self.apkinfo.ret_type == "DEX": rule_obj.check_item[0] = True elif set(rule_obj.permission).issubset(set(self.apkinfo.permissions)): rule_obj.check_item[0] = True else: # Exit if the level 1 stage check fails. return # Level 2: Single Native API Check api_1_method_name = rule_obj.api[0]["method"] api_1_class_name = rule_obj.api[0]["class"] api_1_descriptor = rule_obj.api[0]["descriptor"] api_2_method_name = rule_obj.api[1]["method"] api_2_class_name = rule_obj.api[1]["class"] api_2_descriptor = rule_obj.api[1]["descriptor"] first_api_list = self.find_api_usage( api_1_class_name, api_1_method_name, api_1_descriptor ) second_api_list = self.find_api_usage( api_2_class_name, api_2_method_name, api_2_descriptor ) if not first_api_list and not second_api_list: # Exit if the level 2 stage check fails. return else: rule_obj.check_item[1] = True if first_api_list: self.quark_analysis.level_2_result.append(first_api_list[0]) if second_api_list: self.quark_analysis.level_2_result.append(second_api_list[0]) # Level 3: Both Native API Check if not (first_api_list and second_api_list): # Exit if the level 3 stage check fails. return self.quark_analysis.first_api = first_api_list[0] self.quark_analysis.second_api = second_api_list[0] rule_obj.check_item[2] = True self.quark_analysis.level_3_result = [set(), set()] # Level 4: Sequence Check for first_api in first_api_list: for second_api in second_api_list: # Looking for the first layer of the upper function first_api_xref_from = self.apkinfo.upperfunc(first_api) second_api_xref_from = self.apkinfo.upperfunc(second_api) self.quark_analysis.level_3_result[0].update( first_api_xref_from ) self.quark_analysis.level_3_result[1].update( second_api_xref_from ) mutual_parent_function_list = self.find_intersection( first_api_xref_from, second_api_xref_from ) if mutual_parent_function_list is None: # Exit if the level 4 stage check fails. return for parent_function in mutual_parent_function_list: first_wrapper = [] second_wrapper = [] self.find_previous_method( first_api, parent_function, first_wrapper ) self.find_previous_method( second_api, parent_function, second_wrapper ) if self.check_sequence( parent_function, first_wrapper, second_wrapper ): rule_obj.check_item[3] = True self.quark_analysis.level_4_result.append( parent_function ) keyword_item_list = ( rule_obj.api[i].get("keyword", None) for i in range(2) ) # Level 5: Handling The Same Register Check if self.check_parameter( parent_function, first_wrapper, second_wrapper, keyword_item_list=keyword_item_list, ): rule_obj.check_item[4] = True self.quark_analysis.level_5_result.append( parent_function ) def get_json_report(self): """ Get quark report including summary and detail with json format. :return: json report """ w = Weight( self.quark_analysis.score_sum, self.quark_analysis.weight_sum ) warning = w.calculate() # Filter out color code in threat level for level in ["Low Risk", "Moderate Risk", "High Risk"]: if level in warning: warning = level return { "md5": self.apkinfo.md5, "apk_filename": self.apkinfo.filename, "size_bytes": self.apkinfo.filesize, "threat_level": warning, "total_score": self.quark_analysis.score_sum, "crimes": self.quark_analysis.json_report, } def generate_json_report(self, rule_obj): """ Show the json report. :param rule_obj: the instance of the RuleObject :return: None """ # Count the confidence confidence = str(rule_obj.check_item.count(True) * 20) + "%" conf = rule_obj.check_item.count(True) weight = rule_obj.get_score(conf) score = rule_obj.score # Assign level 1 examine result permissions = rule_obj.permission if rule_obj.check_item[0] else [] # Assign level 2 examine result api = [] if rule_obj.check_item[1]: for item2 in self.quark_analysis.level_2_result: api.append( { "class": str(item2.class_name), "method": str(item2.name), "descriptor": str(item2.descriptor), } ) # Assign level 3 examine result combination = [] if rule_obj.check_item[2]: combination = rule_obj.api # Assign level 4 - 5 examine result if exist sequnce_show_up = [] same_operation_show_up = [] # Check examination has passed level 4 if self.quark_analysis.level_4_result and rule_obj.check_item[3]: for item4 in self.quark_analysis.level_4_result: sequnce_show_up.append( { item4.full_name: self.quark_analysis.parent_wrapper_mapping[ item4.full_name ] } ) # Check examination has passed level 5 if self.quark_analysis.level_5_result and rule_obj.check_item[4]: for item5 in self.quark_analysis.level_5_result: same_operation_show_up.append( { item5.full_name: self.quark_analysis.parent_wrapper_mapping[ item5.full_name ] } ) crime = { "crime": rule_obj.crime, "score": score, "weight": weight, "confidence": confidence, "permissions": permissions, "native_api": api, "combination": combination, "sequence": sequnce_show_up, "register": same_operation_show_up, } self.quark_analysis.json_report.append(crime) # add the weight self.quark_analysis.weight_sum += weight # add the score self.quark_analysis.score_sum += score def add_table_row(self, name, rule_obj, confidence, score, weight): self.quark_analysis.summary_report_table.add_row( [ name, green(rule_obj.crime), yellow(confidence), score, red(weight), ] ) def show_summary_report(self, rule_obj, threshold=None): """ Show the summary report. :param rule_obj: the instance of the RuleObject. :return: None """ # Count the confidence confidence = f"{rule_obj.check_item.count(True) * 20}%" conf = rule_obj.check_item.count(True) weight = rule_obj.get_score(conf) score = rule_obj.score name = rule_obj.rule_filename if threshold: if rule_obj.check_item.count(True) * 20 >= int(threshold): self.add_table_row(name, rule_obj, confidence, score, weight) else: self.add_table_row(name, rule_obj, confidence, score, weight) # add the weight self.quark_analysis.weight_sum += weight # add the score self.quark_analysis.score_sum += score def show_label_report(self, rule_path, all_labels, table_version): """ Show the report based on label, last column represents max confidence for that label :param rule_path: the path where may be present the file label_desc.csv. :param all_labels: dictionary containing label:<array of confidence values associated to that label> :return: None """ label_desc = {} # clear table to manage max/detail version self.quark_analysis.label_report_table.clear() if os.path.isfile(os.path.join(rule_path, "label_desc.csv")): # associate to each label a description col_list = ["label", "description"] # csv file on form <label,description> # put this file in the folder of rules (it must not be a json file since it could create conflict with management of rules) df = pd.read_csv( os.path.join(rule_path, "label_desc.csv"), usecols=col_list ) label_desc = dict(zip(df["label"], df["description"])) for label_name in all_labels: confidences = np.array(all_labels[label_name]) if table_version == "max": self.quark_analysis.label_report_table.field_names = [ "Label", "Description", "Number of rules", "MAX Confidence %", ] self.quark_analysis.label_report_table.add_row( [ green(label_name), yellow(label_desc.get(label_name, "-")), (len(confidences)), red(np.max(confidences)), ] ) else: self.quark_analysis.label_report_table.field_names = [ "Label", "Description", "Number of rules", "MAX Confidence %", "AVG Confidence", "Std Deviation", "# of Rules with Confidence >= 80%", ] self.quark_analysis.label_report_table.add_row( [ green(label_name), yellow(label_desc.get(label_name, "-")), (len(confidences)), red(np.max(confidences)), magenta(round(np.mean(confidences), 2)), lightblue(round(np.std(confidences), 2)), lightyellow(np.count_nonzero(confidences >= 80)), ] ) def show_detail_report(self, rule_obj): """ Show the detail report. :param rule_obj: the instance of the RuleObject. :return: None """ # Count the confidence print("") print(f"Confidence: {rule_obj.check_item.count(True) * 20}%") print("") if rule_obj.check_item[0]: colorful_report("1.Permission Request") for permission in rule_obj.permission: print(f"\t\t {permission}") if rule_obj.check_item[1]: colorful_report("2.Native API Usage") for api in self.quark_analysis.level_2_result: print(f"\t\t {api.full_name}") if rule_obj.check_item[2]: colorful_report("3.Native API Combination") for numbered_api, method_list in zip( ("First API", "Second API"), self.quark_analysis.level_3_result ): print(f"\t\t {numbered_api} show up in:") if method_list: for comb_method in method_list: print(f"\t\t {comb_method.full_name}") else: print("\t\t None") if rule_obj.check_item[3]: colorful_report("4.Native API Sequence") print("\t\t Sequence show up in:") for seq_method in self.quark_analysis.level_4_result: print(f"\t\t {seq_method.full_name}") if rule_obj.check_item[4]: colorful_report("5.Native API Use Same Parameter") for seq_operation in self.quark_analysis.level_5_result: print(f"\t\t {seq_operation.full_name}") def show_call_graph(self, output_format=None): print_info("Creating Call Graph...") for ( call_graph_analysis ) in self.quark_analysis.call_graph_analysis_list: call_graph(call_graph_analysis, output_format) print_success("Call Graph Completed") def show_rule_classification(self): print_info("Rules Classification") data_bundle = get_rule_classification_data( self.quark_analysis.call_graph_analysis_list, MAX_SEARCH_LAYER ) output_parent_function_table(data_bundle) output_parent_function_json(data_bundle) output_parent_function_graph(data_bundle)