def __init__(self, base_dir): """Sets paths, initialises variables, and instantiates classes. :param base_dir: string specifying location of script base directory """ # Set paths. self.path_base_dir = base_dir # Start up utility helper. self.inst_analysis_utils = AnalysisUtils(self.path_base_dir) # Other variables. self.apk_package_name = None self.apk_manifest = None self.apk_manifest_root = None self.bug_template = None self.namespaces = {} self.manifest_requirements_satisfied = False self.all_returns = []
class ManifestAnalyser: """Class to analyse a manifest XML file against a template.""" def __init__(self, base_dir): """Sets paths, initialises variables, and instantiates classes. :param base_dir: string specifying location of script base directory """ # Set paths. self.path_base_dir = base_dir # Start up utility helper. self.inst_analysis_utils = AnalysisUtils(self.path_base_dir) # Other variables. self.apk_package_name = None self.apk_manifest = None self.apk_manifest_root = None self.bug_template = None self.namespaces = {} self.manifest_requirements_satisfied = False self.all_returns = [] def fn_perform_manifest_analysis(self, apk_pkg, bug_template, manifest_string, links={}): """Analyses a given manifest string against a template. :param apk_pkg: string representing APK package name :param bug_template: dictionary object describing the checks that are to be performed :param manifest_string: XML string returned by Androguard, which will be converted to lxml.etree :param links: dictionary object containing linked items :returns: a list containing a boolean indicating whether manifest requirements were satisfied, and link objects. """ # Initialise variables. self.manifest_requirements_satisfied = False self.current_links = links self.all_returns = [] self.apk_package_name = apk_pkg self.apk_manifest_root = etree.fromstring(manifest_string) self.namespaces = self.apk_manifest_root.nsmap # We want to copy the bug template but only edit the copy. # That is, we don't want to modify the original template (which would # happen even if we did new_bug_obj = old_bug_object and modified # only the new_bug_obj). # For this, we do copy.deepcopy(bug template). # This is probably very inefficient. self.bug_template = copy.deepcopy(bug_template) # The manifest may specify a "basepath", # i.e., a starting level within the XML. basepaths = [''] manifest_obj = self.bug_template['MANIFESTPARAMS'] if 'BASEPATH' in manifest_obj: basepath_string = manifest_obj['BASEPATH'] split_basepath = basepath_string.split(' OR ') basepaths = [ basepath.strip().replace('manifest->', '') for basepath in split_basepath ] logging.debug('Identified the following basepaths for ' + 'manifest analysis:\n\t ' + str(basepaths)) # Convert user-specified basepaths to a suitable format # for ltree search. xml_parser_basepaths = [ './' + basepath.replace('->', '/') for basepath in basepaths ] # Get all the paths that satisfy the basepath. # e.g., a basepath of manifest->application->activity # would result in a list of all activities declared within # the manifest. starting_points = [] for xml_parser_basepath in xml_parser_basepaths: starting_points = starting_points \ + self.apk_manifest_root.findall(xml_parser_basepath) logging.debug('Identified ' + str(len(starting_points)) + ' starting points within manifest.') # Begin at a starting point and recursively search through # manifest, matching it up against the bug template at each level. for starting_point in starting_points: self.current_returns = [] self.bug_template = None self.bug_template = copy.deepcopy(bug_template) self.fn_recursive_analysis(self.bug_template['MANIFESTPARAMS'], starting_point) current_level = \ self.fn_check_whether_all_reqs_are_satisfied() if current_level == True: self.manifest_requirements_satisfied = True self.current_links = \ self.inst_analysis_utils.fn_convert_returns_to_links( self.current_returns, self.current_links ) # Return a boolean indicating whether manifest requirements were # satisfied, and the links. return [self.manifest_requirements_satisfied, self.current_links] def fn_recursive_analysis(self, current_template, current_xml_tree): """Recursively checks manifest against bug template for matches. :param current_template: dictionary object containing current level of bug template :param current_xml_tree: lxml Element """ logging.debug('Analysing ' + str(current_xml_tree) + ' against template ' + str(current_template) + '.') # Analyse LOOKFORs. if ('LOOKFOR' in current_template): lookfor_output = self.fn_analyse_lookfor( current_template['LOOKFOR'], current_xml_tree) if lookfor_output == True: current_template['_IDENTIFIED_LOOKFOR'] += 1 if (current_template['_IDENTIFIED_LOOKFOR'] \ >= current_template['_EXPECTED_LOOKFOR']): current_template['_SATISFIED_LOOKFOR'] = True # If LOOKFOR fails, there is no point in proceeding further. else: return # Analyse RETURNs. if ('RETURN' in current_template): returnable_elements_string = current_template['RETURN'] # First get each individual returnable element. # Multiple RETURNs can be specified in a list or in a # comma-separated string. if type(returnable_elements_string) is list: returnable_elements = returnable_elements_string elif ',' in returnable_elements_string: returnable_elements = returnable_elements_string.split(',') else: returnable_elements = [returnable_elements_string] # Invoke return analysis for each returnable element. for returnable_element in returnable_elements: return_values = self.fn_analyse_return( returnable_element.strip(), current_xml_tree) # Add non-null/non-empty values to list of RETURNs. for return_value in return_values: if ((return_value == None) or (return_value == {})): continue if return_value in self.current_returns: continue self.current_returns.append(return_value) # Analyse SEARCHPATH. if ('SEARCHPATH' in current_template): self.fn_recursive_analysis(current_template['SEARCHPATH'], current_xml_tree) # Recursive check at next level. for key in current_template.keys(): # Ignore anything we've already checked. if key in ['BASEPATH', 'SEARCHPATH', 'RETURN', 'LOOKFOR']: continue # Ignore "private" keys. if key[0] == '_': continue logging.debug('Currently looking at key "' + str(key) + '".') # If the subsequent level is also a dictionary, # then we need to do all this all over again. if type(current_template[key]) is dict: xml_search_results = current_xml_tree.findall(key) logging.debug( str(len(xml_search_results)) + ' XML results found for key "' + str(key) + '".') for xml_search_result in xml_search_results: self.fn_recursive_analysis(current_template[key], xml_search_result) logging.debug('Finished Analysing ' + str(current_xml_tree) + ' against template ' + str(current_template) + '.') def fn_analyse_return(self, returnable_elements_string, manifest_location): """Analyses RETURN elements. :param returnable_elements_string: string specifying the element to be returned :param manifest_location: the current level (tier) of the manifest :returns: a list of returnable values in {name: value} format """ # Split the RETURN string into the # RETURN and AS (i.e., element and label/name) parts. split_returnable_elements_string = \ returnable_elements_string.split(' AS ') returnable_element_tag = \ split_returnable_elements_string[0].strip() returnable_elements_name = \ split_returnable_elements_string[1].strip() # If the tag has an identifier (denoting standardisation), # then set a variable denoting this. convert_output_to_smali = False if '<smali>' in returnable_element_tag: returnable_element_tag = returnable_element_tag.replace( '<smali>:', '') convert_output_to_smali = True # Create a list of tags to look for. # In the general way, there should be only one (corresponding to the # "android" namespace). But we check, to be sure. returnable_elements_tags = \ self.fn_generate_namespace_variants(returnable_element_tag) # Initialise a list to store all returnable values. all_returnables = [] # Process each returnable element. for returnable_tag in returnable_elements_tags: if returnable_tag in manifest_location.attrib: output_value = str(manifest_location.attrib[returnable_tag]) if convert_output_to_smali == True: output_value = self.fn_standardise(output_value) logging.debug('Returnable tag found ' + str(returnable_tag) + ' with value ' + output_value) all_returnables.append( {returnable_elements_name: output_value}) return all_returnables def fn_analyse_lookfor(self, lookfor_object, current_xml_tree): """Analyses LOOKFOR elements. :param lookfor_object: dictionary object specifying the parameters to look for :param current_xml_tree: lxml Element :returns: boolean indicating whether the LOOKFOR was satisfied """ # Initialise variables to keep track of how many things we are # supposed to bechecking and how many have been satisfied. expected_lookfors = 0 satisfied_lookfors = 0 # There are different LOOKFOR types, each with a corresponding function. fn_to_execute = None for lookfor_key in lookfor_object: expected_lookfors += 1 if lookfor_key == 'TAGEXISTS': fn_to_execute = self.fn_analyse_tag_exists elif lookfor_key == 'TAGNOTEXISTS': fn_to_execute = self.fn_analyse_tag_not_exists elif lookfor_key == 'TAGVALUEMATCH': fn_to_execute = self.fn_analyse_tag_value_match elif lookfor_key == 'TAGVALUENOMATCH': fn_to_execute = self.fn_analyse_tag_value_no_match else: raise JandroidException({ 'type': str(os.path.basename(__file__)) + ': IncorrectLookforKey', 'reason': 'Unrecognised LOOKFOR key.' }) # A single LOOKFOR object may have a number of elements to # satisfy (specified as a list). all_lookfors = self.fn_process_lookfor_lists( lookfor_object[lookfor_key]) # We have to keep track of these individual elements as well. expected_per_tag_lookfors = len(all_lookfors) satisfied_per_tag_lookfors = 0 # Check each individual element. for single_lookfor in all_lookfors: lookfor_output = fn_to_execute(single_lookfor, current_xml_tree) if lookfor_output == True: satisfied_per_tag_lookfors += 1 # If even one fails, the whole thing fails. else: break # Check if this one LOOKFOR check was fully satisfied. if expected_per_tag_lookfors == satisfied_per_tag_lookfors: satisfied_lookfors += 1 # If even one fails, the whole thing fails. else: break # Finally, check if all expected lookfor elements were satisfied. if expected_lookfors == satisfied_lookfors: return True else: return False def fn_process_lookfor_lists(self, lookfor_item): """Generalises a LOOKFOR element as a list. :param lookfor_item: string denoting element(s) to look for :returns: list of LOOKFOR elements """ if type(lookfor_item) is list: lookfor_values = lookfor_item elif type(lookfor_item) is str: lookfor_values = [lookfor_item] if '' in lookfor_values: lookfor_values.remove('') return lookfor_values def fn_analyse_tag_exists(self, lookfor_string, current_xml_tree): """Checks if a specific tag is present in current level of XML tree. This function merely checks for the presence of a tag. It does not check the tag value. :param lookfor_string: string denoting item to look for :param current_xml_tree: lxml Element :returns: boolean with value True if the tag was present (else, False) """ # Check if multiple items are separated by OR. all_tags = [] if ' OR ' in lookfor_string: all_tags = lookfor_string.split(' OR ') else: all_tags = [lookfor_string] # Get all namespace variants. all_tag_variants = [] for tag in all_tags: all_tag_variants.append(self.fn_generate_namespace_variants(tag)) # Check for the presence of each tag. If even one is satisfied, # return True (because it's an OR operator). for tag_name in all_tag_variants: if tag_name in current_xml_tree.attrib: return True return False def fn_analyse_tag_not_exists(self, lookfor_string, current_xml_tree): """Checks that a tag does not exist at the current XML tree level. :param lookfor_string: string denoting item to look for :param current_xml_tree: lxml Element :returns: boolean with value False if the tag was present (else, True) """ tag = lookfor_string # Generate namespace variants. all_tag_variants = [] for tag in all_tags: all_tag_variants.append(self.fn_generate_namespace_variants(tag)) for tag_name in all_tag_variants: if tag_name in current_xml_tree.attrib: return False return True def fn_analyse_tag_value_match(self, lookfor_string, current_xml_tree): """Checks that a tag (exists and) value matches a given value. :param lookfor_string: string denoting item to look for :param current_xml_tree: lxml Element :returns: boolean with value True if the tag value matched the expected value. False otherwise """ # Generate namespace variants. lookfor_tag = (lookfor_string.split('='))[0].strip() lookfor_tags = self.fn_generate_namespace_variants(lookfor_tag) lookfor_value = (lookfor_string.split('='))[1].strip() # If the tag is "exported", we analyse it separately. # An "exported" tag can't have multiple possible values specified # in LOOKFOR, because it only has two possible values (True or False) # and looking for both would be pointless. if lookfor_tag.split(':')[1] == 'exported': return self.fn_process_exported(lookfor_tags, lookfor_value, current_xml_tree, True) # Create a list of values we look for. # Multiple values can be specified using the OR operator. # Note that the AND operator is not recognised, as # it could just be specified as a separate rule. lookfor_values = [] if ' OR ' in lookfor_value: split_values = lookfor_value.split(' OR ') for split_value in split_values: if split_value.strip() == '': continue lookfor_values.append(split_value.strip()) else: lookfor_values = [lookfor_value] logging.debug('Looking for tag(s) ' + str(lookfor_tags) + ' with value(s) ' + str(lookfor_values) + ' in XML attrib ' + str(current_xml_tree.attrib)) # If the tag is present in the manifest, and the value # matches what we expect, return True. Else, return False. for tag in lookfor_tags: if tag in current_xml_tree.attrib: if current_xml_tree.attrib[tag] in lookfor_values: return True return False def fn_process_exported(self, lookfor_tags, lookfor_value, current_xml_tree, is_match=True): """Processes the "exported" tag. :param lookfor_tags: a list of tags (namespace variants) to look for :param lookfor_value: string value (either "true" or "false") :param current_xml_tree: lxml Element :param is_match: boolean indicating whether the requirement is to check for match or no-match """ # Make sure we are at the correct level in the XML tree. # That is, the exported tag is only used with activities, services, # receivers and providers. current_tag = current_xml_tree.tag exported_tag_options = [ 'activity', 'activity-alias', 'receiver', 'service', 'provider' ] if current_tag not in exported_tag_options: raise JandroidException({ 'type': str(os.path.basename(__file__)) + ': InvalidTag', 'reason': 'Exported tag must belong to one of [' + '"activity", "activity-alias", "receiver", ' + '"service", "provider"' + '].' }) # First check if exported is explicitly defined. # If it is, then we needn't do much more processing. tag_present = False tag_value_in_manifest = None for tag in lookfor_tags: if tag in current_xml_tree.attrib: tag_present = True tag_value_in_manifest = current_xml_tree.attrib[tag] # If exported isn't explicitly defined, then consider default values. if tag_present == False: # For activities, receivers and services, the presence of an # intent-filter means exported defaults to True. # Else it defaults to False. if current_tag in [ 'activity', 'activity-alias', 'receiver', 'service' ]: intent_filters = current_xml_tree.findall('intent-filter') if intent_filters == []: tag_value_in_manifest = 'false' else: tag_value_in_manifest = 'true' # For providers, if sdkversion >= 17, defaults to False. # Else, defaults to True. elif current_tag in ['provider']: target_sdk_version = None uses_sdk = self.apk_manifest_root.findall('uses-sdk') if uses_sdk != []: possible_targetsdktags = \ self.fn_generate_namespace_variants( '<NAMESPACE>:targetSdkVersion' ) for uses_sdk_element in uses_sdk: for targetsdktag in possible_targetsdktags: if targetsdktag in uses_sdk_element.attrib: target_sdk_version = \ int(uses_sdk_element.attrib[targetsdktag]) if target_sdk_version != None: if target_sdk_version >= 17: tag_value_in_manifest = 'false' else: tag_value_in_manifest = 'true' # This is a non-ideal way to handle the situation where there # is a provider with no explicit export, and no # uses-sdk/targetSdkVersion element. if tag_value_in_manifest == None: return False # If the values match, then if the goal was # to have the values match, return True. Else, return False. if tag_value_in_manifest == lookfor_value: if is_match == True: return True else: return False # If the values match, and the goal was that they should *not* match, # return False. Else, return True. else: if is_match == True: return False else: return True def fn_analyse_tag_value_no_match(self, lookfor_string, current_xml_tree): """Checks to make sure no tag value matches the given string pattern. :param lookfor_string: string denoting item to look for :param current_xml_tree: lxml Element :returns: boolean with value True if at least one tag value did not match the given value. False otherwise """ # Generate namespace variants. lookfor_tag = (lookfor_string.split('='))[0].strip() lookfor_tags = self.fn_generate_namespace_variants(lookfor_tag) lookfor_value = (lookfor_string.split('='))[1].strip() # If the tag is "exported", we analyse it separately. # An "exported" tag can't have multiple possible values specified # in LOOKFOR, because it only has two possible values (True or False) # and looking for both would be pointless. if lookfor_tag.split(':')[1] == 'exported': return self.fn_process_exported(lookfor_tags, lookfor_value, current_xml_tree, False) # If the tag is not present in the manifest, then return True. # If the tag is present, but value doesn't match, return True. # Else, return False. for tag in lookfor_tags: if tag in current_xml_tree.attrib: if current_xml_tree.attrib[tag] != lookfor_value: return True return False def fn_generate_namespace_variants(self, tag): """Generates namespace variants for an XML tag. :param tag: string XML tag, for which namespace variants are to be generated :returns: list of namespace variants """ # Different namespaces may be used instead of the default "android" # (although this is quite rare). # To handle this, the user specifies a placeholder # "<NAMESPACE>". The script obtains all namespaces from # the manifest and generates all possible tags. tags = [] if '<NAMESPACE>' in tag: for namespace in self.namespaces: tag = tag.replace('<NAMESPACE>:', ('{' + self.namespaces[namespace] + '}')) tags.append(tag) else: tags = [tag] return tags def fn_check_whether_all_reqs_are_satisfied(self): """Checks whether all bug elements are satisfied. This function calls a recursive check function to analyse all LOOKFOR elements in the current bug template. If, at the end, the number of satisfied LOOKFORs is equal to the number of expected LOOKFORs, then it returns True. Else, it returns False. :returns: boolean with value True if all requirements are satisfied and False if not """ self.expected_lookfor = 0 self.satisfied_lookfor = 0 self.fn_recursively_check_lookfor(self.bug_template['MANIFESTPARAMS']) if self.expected_lookfor == self.satisfied_lookfor: return True else: return False def fn_recursively_check_lookfor(self, json_obj): """Recursively checks all LOOKFOR elements. This function checks a single LOOKFOR element. It checks whether the '_SATISFIED_LOOKFOR' value is True at the same level, and if it is, it increments the satisfied_lookfor count. :param json_obj: dictionary object representing one level of the manifest analysis template """ for key in json_obj: if key == 'LOOKFOR': self.expected_lookfor += 1 if json_obj['_SATISFIED_LOOKFOR'] == True: self.satisfied_lookfor += 1 # If even one LOOKFOR is not satisfied, # then the match fails. else: return # If the child object is a dictionary, # we have to repeat this process. if type(json_obj[key]) is dict: self.fn_recursively_check_lookfor(json_obj[key]) def fn_standardise(self, element): """Converts a string from dotted (Java) representation to smali. :param element: string to convert (from Java to smali) :returns: modified string """ if '.' in element: element = Conversions().fn_dotted_to_smali(element) return element
def fn_perform_code_trace(self, a, d, dx, code_trace_template, links): """Traces within code based on a trace template. :param code_trace_template: dictionary object corresponding to the trace part of a bug template :param links: dictionary object containing linked items :returns: list containing boolean value indicating whether the trace was satisfied, and a dictionary object of updated links """ logging.debug('Performing code trace.') # Androguard variables for this APK. self.androguard_apk_obj = a self.androguard_d_array = d self.androguard_dx = dx # Start up utility helper. self.inst_analysis_utils = AnalysisUtils(self.path_base_dir, self.androguard_apk_obj, self.androguard_d_array, self.androguard_dx) # The TRACE-relevant part of the bug template. self.trace_template = code_trace_template # Linked elements from checking previous parts of the template. self.current_links = links # Keep track of trace chains (to be converted to RETURN items). self.output_chains = [] # Variables to determine how many traces to perform and # to keep track of how many have been satisfied. total_traces = 0 satisfied_traces = 0 # Variable to determine whether the overall TRACE is satisfied. bool_satisfied = False # The trace template can either be a dictionary or a list # of dictionary objects. if type(self.trace_template) is dict: bool_satisfied = \ self.fn_process_individual_trace_list_item(self.trace_template) # If the search is a list, then all individual sub-traces # must be satisfied. elif type(self.trace_template) is list: for trace_item in self.trace_template: total_traces += 1 bool_one_satisfied = \ self.fn_process_individual_trace_list_item(trace_item) if bool_one_satisfied == True: satisfied_traces += 1 if satisfied_traces == total_traces: bool_satisfied = True # Process returns as links. if bool_satisfied == True: self.current_links = \ self.inst_analysis_utils.fn_convert_returns_to_links( self.current_returns, self.current_links ) self.fn_reset() # Return the outcome and the links, to be used by next code segment. return [bool_satisfied, self.current_links]
class CodeTrace: """The main code tracing class.""" def __init__(self, base_dir): """Sets paths and initialises variables. :param a: androguard.core.bytecodes.apk.APK object :param d: array of androguard.core.bytecodes.dvm.DalvikVMFormat objects :param dx: androguard.core.analysis.analysis.Analysis object :param base_dir: string indicating script base path """ # Set paths. self.path_base_dir = base_dir self.path_config_file = os.path.join(self.path_base_dir, 'config', 'jandroid.conf') # Set a default max trace length. self.default_trace_length_max = 25 # Read config file. config = configparser.ConfigParser() config.read(self.path_config_file) if config.has_section('TRACEPARAMS'): if config.has_option('TRACEPARAMS', 'TRACE_LENGTH_MAX'): self.default_trace_length_max = \ int(config['TRACEPARAMS']['TRACE_LENGTH_MAX']) self.trace_length_max = self.default_trace_length_max # Initialise special case object. self.special_case_object_list_reverse = { 'doInBackground': { 'Landroid/os/AsyncTask;': [ 'execute([Ljava/lang/Object;)Landroid/os/AsyncTask;', 'execute(Ljava/lang/Runnable;)' ] } } self.special_case_object_list_forward = { 'execute([Ljava/lang/Object;)Landroid/os/AsyncTask;': 'doInBackground', 'execute(Ljava/lang/Runnable;)V': 'doInBackground' } # Store returns. self.current_returns = [] # This is to let us know whether to perform a "lenient" stop check or not. self.hardcoded_traceto = False self.advanced_trace = CodeTraceAdvanced(self.path_base_dir) def fn_reset(self): """Resets objects to free up memory.""" self.androguard_apk_obj = None self.androguard_d_array = None self.androguard_dx = None self.inst_analysis_utils = None self.current_returns = [] def fn_perform_code_trace(self, a, d, dx, code_trace_template, links): """Traces within code based on a trace template. :param code_trace_template: dictionary object corresponding to the trace part of a bug template :param links: dictionary object containing linked items :returns: list containing boolean value indicating whether the trace was satisfied, and a dictionary object of updated links """ logging.debug('Performing code trace.') # Androguard variables for this APK. self.androguard_apk_obj = a self.androguard_d_array = d self.androguard_dx = dx # Start up utility helper. self.inst_analysis_utils = AnalysisUtils(self.path_base_dir, self.androguard_apk_obj, self.androguard_d_array, self.androguard_dx) # The TRACE-relevant part of the bug template. self.trace_template = code_trace_template # Linked elements from checking previous parts of the template. self.current_links = links # Keep track of trace chains (to be converted to RETURN items). self.output_chains = [] # Variables to determine how many traces to perform and # to keep track of how many have been satisfied. total_traces = 0 satisfied_traces = 0 # Variable to determine whether the overall TRACE is satisfied. bool_satisfied = False # The trace template can either be a dictionary or a list # of dictionary objects. if type(self.trace_template) is dict: bool_satisfied = \ self.fn_process_individual_trace_list_item(self.trace_template) # If the search is a list, then all individual sub-traces # must be satisfied. elif type(self.trace_template) is list: for trace_item in self.trace_template: total_traces += 1 bool_one_satisfied = \ self.fn_process_individual_trace_list_item(trace_item) if bool_one_satisfied == True: satisfied_traces += 1 if satisfied_traces == total_traces: bool_satisfied = True # Process returns as links. if bool_satisfied == True: self.current_links = \ self.inst_analysis_utils.fn_convert_returns_to_links( self.current_returns, self.current_links ) self.fn_reset() # Return the outcome and the links, to be used by next code segment. return [bool_satisfied, self.current_links] def fn_process_individual_trace_list_item(self, trace_dictionary): """Processes an individual trace object. :param trace_dictionary: dictionary object containing details of an individual trace to perform :returns: boolean indicating whether the trace requirements were satisfied """ # Each item within the list must be a dictionary trace object. bool_satisfied = False # Get parameters such as trace direction, etc. self.fn_get_trace_parameters(trace_dictionary) if self.trace_type == TRACE_TYPE_ADVANCED: bool_adv_trace_output, output_chains = \ self.advanced_trace.fn_start_adv_trace( self.androguard_apk_obj, self.androguard_d_array, self.androguard_dx, trace_dictionary, self.current_links, self.trace_direction, self.trace_length_max ) return bool_adv_trace_output # There may be a number of combinations, if the trace from/to # have elements separated by OR. [trace_from_string_list, trace_to_string_list] = \ self.fn_enumerate_trace_source_sinks(trace_dictionary) # For each combination, run trace. for trace_from_string_element in trace_from_string_list: for trace_to_string_element in trace_to_string_list: bool_single_trace_satisfied = self.fn_trace_through_code( trace_from_string_element, trace_to_string_element) if bool_single_trace_satisfied == True: bool_satisfied = True if bool_satisfied == True: if 'RETURN' in trace_dictionary: self.fn_analyse_returns(trace_dictionary) return bool_satisfied def fn_get_trace_parameters(self, trace_template): """Sets trace parameters based on trace template. :param trace_template: dictionary object corresponding to a single trace, from which trace parameters are to be extracted """ # Set max trace length, if available. if 'TRACELENGTHMAX' in trace_template: self.trace_length_max = int(trace_template['TRACELENGTHMAX']) else: self.trace_length_max = self.default_trace_length_max # Set trace direction. if 'TRACEDIRECTION' in trace_template: trace_direction = trace_template['TRACEDIRECTION'] if trace_direction == TRACE_FORWARD: self.trace_direction = TRACE_FORWARD else: self.trace_direction = TRACE_REVERSE else: # Default is REVERSE. self.trace_direction = TRACE_REVERSE # Set trace type. if 'TRACETYPE' in trace_template: self.trace_type = trace_template['TRACETYPE'] else: self.trace_type = TRACE_TYPE_BASIC def fn_enumerate_trace_source_sinks(self, trace_template): """Enumerates the (list of) trace start and end points from template. :param trace_template: dictionary object corresponding to a single trace, from which trace end points are to be extracted :returns: list containing two lists - the first a list of possible start points and the second, a list of possible end points """ # Get the start points. trace_from_string = trace_template['TRACEFROM'] if ' OR ' in trace_from_string: trace_from_string_list = trace_from_string.split(' OR ') else: trace_from_string_list = [trace_from_string] # Get the end points. trace_to_string = trace_template['TRACETO'] if ' OR ' in trace_to_string: trace_to_string_list = trace_to_string.split(' OR ') else: trace_to_string_list = [trace_to_string] return [trace_from_string_list, trace_to_string_list] def fn_trace_through_code(self, trace_from_string, trace_to_string): """Begins the actual trace. :param trace_from_string: string corresponding to a single start point :param trace_to_string: string corresponding to a single end point :returns: boolean indicating whether at least one path between the start and end points was found """ # Get trace types. [self.from_class_method, trace_from_string] = \ self.fn_get_trace_type(trace_from_string) [self.to_class_method, trace_to_string] = \ self.fn_get_trace_type(trace_to_string) # Get any linked items. trace_from_list = self.fn_get_trace_items(trace_from_string, self.from_class_method) trace_to_list = self.fn_get_trace_items(trace_to_string, self.to_class_method) if ((trace_from_list == []) or (trace_to_list == [])): logging.debug('Either TraceFrom or TraceTo evaluated to None.') return False self.trace_to_list = trace_to_list return self.fn_trace_handler(trace_from_list) def fn_get_trace_type(self, string): """Gets trace starting point type. :param string: string containing trace start point type (either "<class>" or "<method>". The string may not directly contain these values, in which case the type will have to be inferred. :returns: list containing the start point type and the modified string (within the "<class>" or "<method>" indication removed) """ trace_type = '<class>' if ':' in string: trace_type = string.split(':')[0] string = string[len(trace_type) + 1:] else: if '->' in string: trace_type = '<method>' return [trace_type, string] def fn_get_trace_items(self, string, trace_type): """Gets the actual strings to use as start/end points of trace. :param string: the string specified within the template :param trace_type: string (either "<class>" or "<method>"), indicating whether the trace should begin/end at the class level or method level :returns: list of possible start/end points """ output_items = [] # If the string begins with @, then we need to find linked items. if string[0] == '@': self.hardcoded_traceto = False # If a sub-part has not been specified, then assume that the # entire string is the link name. if ']' not in string: link_name = string link_subpart = '' remaining_string = '' # If a sub-part has been specified, then split the string to # identify the link name, relevant sub-part, and remainder # of string. else: split_for_link = string.split(']') remaining_string = split_for_link[1] second_split = split_for_link[0].split('[') link_name = second_split[0] link_subpart = second_split[1].replace(' ', '') # Get all linked items. linked_items = self.inst_analysis_utils.fn_get_linked_items( self.current_links, link_name) if link_subpart == '': for linked_item in linked_items: return_string = linked_item + remaining_string if trace_type == '<class>': return_string = return_string.split('->')[0] output_items.append(return_string) elif link_subpart == '<class>': for linked_item in linked_items: class_part_only = linked_item.split('->')[0] return_string = class_part_only + remaining_string if trace_type == '<class>': return_string = return_string.split('->')[0] output_items.append(return_string) elif link_subpart == '<method>': for linked_item in linked_items: if '->' not in linked_item: continue return_string = linked_item + remaining_string if trace_type == '<class>': return_string = return_string.split('->')[0] output_items.append(return_string) # If the string doesn't begin with @, then it's a normal string. else: self.hardcoded_traceto = True if trace_type == '<class>': string = string.split('->')[0] output_items = [string] return output_items def fn_trace_handler(self, trace_from_list): """Starts the trace process and outputs the result. :param trace_from_list: list containing possible start points for trace :returns: boolean indicating whether at least one path was identified between the start and end points """ for trace_from in trace_from_list: self.checked_methods = set() # Set a stop condition. self.stop_condition = STOP_CONDITION_FALSE # Get class/method/desc parts. [class_part, method_part, desc_part] = \ self.fn_determine_class_method_desc( trace_from, self.from_class_method ) # Start the forward or reverse tracers, based on template. if self.trace_direction == TRACE_REVERSE: self.fn_trace_reverse(class_part, method_part, desc_part, trace_from) else: self.fn_trace_forward(class_part, method_part, desc_part, trace_from) # If the output chain list is not empty, it means at least one path # between the start and end points was identified. if self.output_chains != []: return True else: return False def fn_trace_reverse(self, class_part, method_part, desc_part, trace_chain=''): """Performs the reverse tracing function. Reverse tracing starts from TRACEFROM and gets all xref_from at each level. The collection of all xref_from's are stored in an "ordered string". :param class_part: string denoting class part of trace start point :param method_part: string denoting method part of trace start point :param desc_part: string denoting descriptor part of trace start point :param trace_chain: string denoting ordered trace chain """ # Get starting points. starting_points = \ self.inst_analysis_utils.fn_get_calls_to_method( class_part, method_part, desc_part ) # Include subclasses. all_subclasses = [] all_subclasses.extend( self.inst_analysis_utils.fn_find_subclasses(class_part)) for subclass in all_subclasses: starting_points.extend( self.inst_analysis_utils.fn_get_calls_to_method( subclass, method_part, desc_part)) # We want to also add the original method to the search as it might not be directly called, for example OnCreate. if desc_part != '.': desc_part = re.escape(desc_part) class_part = re.escape(class_part) method_part = re.escape(method_part) mathcing_methods = self.androguard_dx.find_methods( class_part, method_part, desc_part) for method in mathcing_methods: starting_points.append(method.get_method()) # Reset. class_part = None method_part = None desc_part = None # Start trace for each starting point. for starting_point in starting_points: # Get class/method/desc parts. [class_part, method_part, desc_part] = \ self.inst_analysis_utils.fn_get_class_method_desc_from_method( starting_point ) # If we want to consider subclasses as well. # Note that this is different to the step above. Above, we get # subclasses of the class/method that is being called. Here, we # get the subclasses for the class that is doing the calling. class_parts = [class_part] class_parts.extend( self.inst_analysis_utils.fn_find_subclasses(class_part)) # Handle any special cases (AsyncTask, etc). # The class name remains the same for these special cases. # Only the method/descriptor changes. if method_part in self.special_case_object_list_reverse: method_descriptors = \ self.fn_handle_special_case_reverse( class_part, method_part, desc_part ) else: method_descriptors = [method_part + desc_part] if not method_descriptors: method_descriptors = [method_part + desc_part] # Go to the next step of the trace. for class_part in class_parts: for method_descriptor in method_descriptors: method_part = method_descriptor.split('(')[0] desc_part = '(' + method_descriptor.split('(')[1] self.fn_analyse_trace_point(class_part, method_part, desc_part, trace_chain) def fn_handle_special_case_reverse(self, class_part, method_part, desc_part): """Handles cases such as AsyncTask, where no direct link can be made. :param class_part: string name for class :param method_part: string name for method :param desc_part: string name for descriptor :returns: list of revised method_part+desc_part """ relevant_object = self.special_case_object_list_reverse[method_part] new_method_to_search = [] all_superclasses = \ self.inst_analysis_utils.fn_find_superclasses(class_part) # Is this needed? all_superclasses.append(class_part) for superclass in all_superclasses: superclass = superclass.strip() if superclass in relevant_object: return relevant_object[superclass] def fn_trace_forward(self, class_part, method_part, desc_part, trace_chain=''): """Performs the forward tracing function. Forward tracing starts from TRACEFROM and gets all xref_to at each level. The collection of all xref_to's are stored in an "ordered string". :param class_part: string denoting class part of trace start point :param method_part: string denoting method part of trace start point :param desc_part: string denoting descriptor part of trace start point :param trace_chain: string denoting ordered trace chain """ # Get starting points. # These will still be methods that call the method of interest # (even though the trace direction is Forward). starting_points = \ self.inst_analysis_utils.fn_get_calls_from_method( class_part, method_part, desc_part ) # We want to also add the original method to the search as it might not be directly called, for example OnCreate. if desc_part != '.': desc_part = re.escape(desc_part) class_part = re.escape(class_part) method_part = re.escape(method_part) mathcing_methods = self.androguard_dx.find_methods( class_part, method_part, desc_part) for method in mathcing_methods: starting_points.append(method.get_method()) # Reset. class_part = None method_part = None desc_part = None for starting_point in starting_points: # If the method is external, we won't get any further. # Get class/method/desc parts. [class_part, method_part, desc_part] = \ self.inst_analysis_utils.fn_get_class_method_desc_from_method( starting_point ) class_parts = [class_part] # Special case handling. method_descriptor = method_part + desc_part if method_descriptor in self.special_case_object_list_forward: method_part = \ self.fn_handle_special_case_forward(method_descriptor) desc_part = '.' # Go to next step. for class_part in class_parts: self.fn_analyse_trace_point(class_part, method_part, desc_part, trace_chain) def fn_handle_special_case_forward(self, method_descriptor): """Handle special cases, such as AsyncTask, in forward traces. :param method_descriptor: string denoting combined method and descriptor parts :returns: string for method part """ return self.special_case_object_list_forward[method_descriptor] def fn_analyse_trace_point(self, class_part, method_part, desc_part, trace_chain): """Checks current trace point against stop condition; else continues. :param class_part: string denoting class part of current trace point :param method_part: string denoting method part of current trace point :param desc_part: string denoting descriptor part of current trace point :param trace_chain: string denoting ordered trace chain """ compound_name = class_part + '->' + method_part + desc_part if compound_name.startswith('Landroid') or compound_name.startswith( 'Ljava') or compound_name.startswith('Lcom/google/android'): return if compound_name in self.checked_methods: return else: self.checked_methods.add(compound_name) tmpChain = [] # Check if stop condition is met. self.fn_check_stop_condition(compound_name) if self.stop_condition == STOP_CONDITION_TRUE: self.stop_condition = STOP_CONDITION_FALSE if trace_chain == '': trace_chain = compound_name else: trace_chain = trace_chain + ',' + compound_name # If somehow we have the same chain repeated: if trace_chain in self.output_chains: return self.output_chains.append(trace_chain) for trace_chain in tmpChain: if trace_chain in self.output_chains: return self.output_chains.append(trace_chain) return elif self.stop_condition == STOP_CONDITION_MAYBE: self.stop_condition = STOP_CONDITION_FALSE compound_name = '|MAYBE|' + compound_name if trace_chain == '': trace_chain = compound_name else: trace_chain = trace_chain + ',' + compound_name # If somehow we have the same chain repeated: if trace_chain in self.output_chains: return self.output_chains.append(trace_chain) for trace_chain in tmpChain: if trace_chain in self.output_chains: return self.output_chains.append(trace_chain) else: if trace_chain == '': trace_chain = compound_name else: trace_chain = trace_chain + ',' + compound_name # If somehow we have the same chain repeated: if trace_chain in tmpChain: return tmpChain.append(trace_chain) # If the stop condition wasn't met, # and we haven't exceeded the max chain length. trace_chain_as_list = trace_chain.split(',') if len(trace_chain_as_list) > self.trace_length_max: return if self.trace_direction == TRACE_FORWARD: self.fn_trace_forward(class_part, method_part, desc_part, trace_chain) else: self.fn_trace_reverse(class_part, method_part, desc_part, trace_chain) def fn_check_stop_condition(self, check_value): """Checks whether the stop condition has been satisfied for the trace. This does not return a value, but rather sets a variable to a pre-defined value if the stop condition is satisfied. :param check_value: string value to be checked against stop condition """ if self.to_class_method == '<class>': check_value = check_value.split('->')[0] if check_value in self.trace_to_list: self.stop_condition = STOP_CONDITION_TRUE return # Special types of checks for when the traceto is hardcoded. if self.hardcoded_traceto == False: return # This should never be true. Hardcoded traceto's will only have one # value in the list (even with ORs). if len(self.trace_to_list) > 1: return trace_to_item = self.trace_to_list[0] # Check for wildcard classes. if ((self.to_class_method == '<class>') and ('*' in trace_to_item)): trace_to_item = trace_to_item.replace('*', '') if trace_to_item in check_value: self.stop_condition = STOP_CONDITION_TRUE else: self.stop_condition = STOP_CONDITION_FALSE return # Do a partial search for methods only. Do this only when the entire # trace-to is hardcoded. # If traceto is only a class, we can't do much. if '->' not in trace_to_item: return if '->' not in check_value: return # If traceto doesn't have descriptor, don't proceed. # Else, we might end up with way too many FPs. if '(' not in trace_to_item: return if '(' not in check_value: return if trace_to_item.split('->')[1] == check_value.split('->')[1]: self.stop_condition = STOP_CONDITION_MAYBE return def fn_determine_class_method_desc(self, trace_from, trace_from_type): """Determines the class/method/desc parts based on trace start point. :param trace_from: string denoting trace start point :param trace_from_type: string containing trace start point type (either "<class>" or "<method>") :returns: list containing class, method, descriptor parts """ [class_part, method_part, desc_part] = \ self.inst_analysis_utils.fn_get_class_method_desc_from_string( trace_from ) # If we care only about the class part, overwrite the method/desc # parts with '.' (i.e., "don't care") if trace_from_type == '<class>': method_part = '.' desc_part = '.' return [class_part, method_part, desc_part] def fn_analyse_returns(self, trace_template): """Analyses the return object and appends items to returns list. :param trace_template: dictionary object containing RETURN element """ returnables = trace_template['RETURN'] returnable_elements_name = returnables.split(' AS ')[1] return_type = returnables.split(' AS ')[0] # Analyse each chain. for chain_string in self.output_chains: chain = chain_string.split(',') if self.trace_direction == TRACE_REVERSE: chain.reverse() output_str = '' for chain_node in chain: chain_node = chain_node.strip() if output_str == '': output_str = chain_node else: output_str = output_str + ',' + chain_node self.current_returns.append({returnable_elements_name: output_str})
def fn_perform_code_search(self, a, d, dx, code_search_template, links): """Search through an APK code for template matches. :param code_search_template: dictionary object corresponding to the search part of a bug template :param links: dictionary object containing linked items :returns: list containing boolean value indicating whether the search was satisfied, and a dictionary object of updated links """ logging.debug('Performing code search.') # Androguard values for current APK. self.androguard_apk_obj = a self.androguard_d_array = d self.androguard_dx = dx # Start up utility helper. self.inst_analysis_utils = AnalysisUtils(self.path_base_dir, self.androguard_apk_obj, self.androguard_d_array, self.androguard_dx) # The SEARCH-relevant part of the bug template. self.search_template = code_search_template # Linked elements from checking previous parts of the template. self.current_links = links # A list to hold returnable elements (which will be # converted to links). self.current_returns = [] # Variables to determine how many searches to perform and # to keep track of how many have been satisfied. total_searches_to_perform = 0 satisfied_searches = 0 # Variable to determine whether the overall SEARCH is satisfied. bool_satisfied = False # The search template can either be a dictionary or a list of # dictionaries. if type(self.search_template) is dict: bool_satisfied = self.fn_process_individual_search_item( self.search_template) # If the search is a list, then all individual sub-searches # must be satisfied. elif type(self.search_template) is list: for search_item in self.search_template: total_searches_to_perform += 1 bool_one_satisfied = \ self.fn_process_individual_search_item(search_item) if bool_one_satisfied == True: satisfied_searches += 1 if satisfied_searches == total_searches_to_perform: bool_satisfied = True # Process returns as links. if bool_satisfied == True: self.current_links = \ self.inst_analysis_utils.fn_convert_returns_to_links( self.current_returns, self.current_links ) self.fn_reset() # Return the outcome and the links, to be used by next code segment. return [bool_satisfied, self.current_links]
class CodeSearch: def __init__(self, base_dir): """Sets paths and initialises variables. :param a: androguard.core.bytecodes.apk.APK object :param d: array of androguard.core.bytecodes.dvm.DalvikVMFormat objects :param dx: androguard.core.analysis.analysis.Analysis object :param base_dir: string indicating script base path """ # Set paths. self.path_base_dir = base_dir def fn_reset(self): """Resets objects to free up memory.""" self.androguard_apk_obj = None self.androguard_d_array = None self.androguard_dx = None self.inst_analysis_utils = None def fn_perform_code_search(self, a, d, dx, code_search_template, links): """Search through an APK code for template matches. :param code_search_template: dictionary object corresponding to the search part of a bug template :param links: dictionary object containing linked items :returns: list containing boolean value indicating whether the search was satisfied, and a dictionary object of updated links """ logging.debug('Performing code search.') # Androguard values for current APK. self.androguard_apk_obj = a self.androguard_d_array = d self.androguard_dx = dx # Start up utility helper. self.inst_analysis_utils = AnalysisUtils(self.path_base_dir, self.androguard_apk_obj, self.androguard_d_array, self.androguard_dx) # The SEARCH-relevant part of the bug template. self.search_template = code_search_template # Linked elements from checking previous parts of the template. self.current_links = links # A list to hold returnable elements (which will be # converted to links). self.current_returns = [] # Variables to determine how many searches to perform and # to keep track of how many have been satisfied. total_searches_to_perform = 0 satisfied_searches = 0 # Variable to determine whether the overall SEARCH is satisfied. bool_satisfied = False # The search template can either be a dictionary or a list of # dictionaries. if type(self.search_template) is dict: bool_satisfied = self.fn_process_individual_search_item( self.search_template) # If the search is a list, then all individual sub-searches # must be satisfied. elif type(self.search_template) is list: for search_item in self.search_template: total_searches_to_perform += 1 bool_one_satisfied = \ self.fn_process_individual_search_item(search_item) if bool_one_satisfied == True: satisfied_searches += 1 if satisfied_searches == total_searches_to_perform: bool_satisfied = True # Process returns as links. if bool_satisfied == True: self.current_links = \ self.inst_analysis_utils.fn_convert_returns_to_links( self.current_returns, self.current_links ) self.fn_reset() # Return the outcome and the links, to be used by next code segment. return [bool_satisfied, self.current_links] def fn_process_individual_search_item(self, search_dictionary): """Process an individual search object. :param search_dictionary: individual search object (dictionary) :returns: boolean indicating whether all parameters within search object have been specified """ # Initialise output. bool_satisfied = False total_searches_to_perform = 0 satisfied_searches = 0 # Determine the specific type of search to perform. # Each type will increment the search-to-be-performed count. # If the outcome is True, then the satisfied searches count # will also be incremented. for search_item in self.search_template: total_searches_to_perform += 1 bool_search_satisfied = \ self.fn_determine_search_type( search_item, self.search_template[search_item] ) if bool_search_satisfied == True: satisfied_searches += 1 # If the number of searches equals the number that returned True, # then the overall result is True. if total_searches_to_perform == satisfied_searches: bool_satisfied = True return bool_satisfied def fn_determine_search_type(self, search_type, search_object): """Executes appropriate function based on search type. :param search_type: string value indicating the type of search :param search_object: dictionary object containing search parameters :returns: boolean output from executing the relevant function """ fn_to_execute = None items_to_search = [] # Determine the correct function to execute. # ------------------------------------- # Search for the presence of a string. if search_type == 'SEARCHFORSTRING': fn_to_execute = self.fn_search_for_presence_of_string items_to_search = self.fn_identify_search_items( 'STRING', search_object['STRING']) # Search for "calls" to a string. elif search_type == 'SEARCHFORCALLTOSTRING': fn_to_execute = self.fn_search_for_calls_to_string items_to_search = self.fn_identify_search_items( 'STRING', search_object['STRING']) # Search for the presence of a method. elif search_type == 'SEARCHFORMETHOD': fn_to_execute = self.fn_search_for_presence_of_method items_to_search = self.fn_identify_search_items( 'METHOD', search_object['METHOD']) # Search for calls to a method. elif search_type == 'SEARCHFORCALLTOMETHOD': fn_to_execute = self.fn_search_for_calls_to_method items_to_search = self.fn_identify_search_items( 'METHOD', search_object['METHOD']) # Search for the presence of a class. elif search_type == 'SEARCHFORCLASS': fn_to_execute = self.fn_search_for_presence_of_class items_to_search = self.fn_identify_search_items( 'CLASS', search_object['CLASS']) # Search for calls to a class. elif search_type == 'SEARCHFORCALLTOCLASS': fn_to_execute = self.fn_search_for_calls_to_class items_to_search = self.fn_identify_search_items( 'CLASS', search_object['CLASS']) # ------------------------------------- # Execute the function. return fn_to_execute(search_object, items_to_search) def fn_identify_search_items(self, type, input): """Identify the specific items to search for. :param type: 'STRING', 'METHOD', or 'CLASS' :param input: input search string (from template) :returns: a list of strings/classes/methods to search for """ search_class_or_method = '<class>' if ':' in input: search_class_or_method = input.split(':')[0] input = input[len(search_class_or_method) + 1:] else: if '->' in input: search_class_or_method = '<method>' if type == 'STRING': if ' OR ' in input: search_strings = input.split(' OR ') else: search_strings = [input] return search_strings elif type == 'CLASS': if ' OR ' in input: split_classes = input.split(' OR ') else: split_classes = [input] all_classes = [] for one_class in split_classes: if one_class[0] == '@': linked_classes = self.fn_get_linked_items( one_class, search_class_or_method) for linked_class in linked_classes: if linked_class in all_classes: continue all_classes.append(linked_class) else: all_classes.append(one_class) return all_classes elif type == 'METHOD': if ' OR ' in input: split_methods = input.split(' OR ') else: split_methods = [input] all_methods = [] for one_method in split_methods: if one_method[0] == '@': linked_methods = self.fn_get_linked_items( one_method, search_class_or_method) for linked_method in linked_methods: if linked_method in all_methods: continue all_methods.append(linked_method) else: all_methods.append(one_method) return all_methods def fn_get_linked_items(self, string, search_class_or_method): """Get items from link list. :param string: key into link list :param search_class_or_method: string (one of <class> or <method>), indicating whether the search should be at the class level or method level :returns: list of linked items (or sub-parts, as specified by search_class_or_method) """ output_items = [] if ']' not in string: link_name = string link_subpart = '' remaining_string = '' # If a sub-part has been specified, then split the string to # identify the link name, relevant sub-part, and remainder # of string. else: split_for_link = string.split(']') remaining_string = split_for_link[1] second_split = split_for_link[0].split('[') link_name = second_split[0] link_subpart = second_split[1].replace(' ', '') # Get all linked items. linked_items = self.inst_analysis_utils.fn_get_linked_items( self.current_links, link_name) if link_subpart == '': for linked_item in linked_items: return_string = linked_item + remaining_string if search_class_or_method == '<class>': return_string = return_string.split('->')[0] output_items.append(return_string) elif link_subpart == '<class>': for linked_item in linked_items: class_part_only = linked_item.split('->')[0] return_string = class_part_only + remaining_string if search_class_or_method == '<class>': return_string = return_string.split('->')[0] output_items.append(return_string) elif link_subpart == '<method>': for linked_item in linked_items: if '->' not in linked_item: continue return_string = linked_item + remaining_string if search_class_or_method == '<class>': return_string = return_string.split('->')[0] output_items.append(return_string) return list(set(output_items)) def fn_search_for_presence_of_string(self, string_search_object, search_strings): """Searches for a string within code. :param string_search_object: object containing search parameters :param search_strings: list of strings to search for :returns: boolean indicating whether the string was present within APK """ # Output all_strings = [] for search_string in search_strings: logging.debug('Searching for string: "' + search_string + '".') all_strings.extend( self.inst_analysis_utils.fn_get_strings(search_string.strip())) # If at least one string is present. if len(all_strings) == 0: return False else: return True def fn_search_for_presence_of_method(self, method_search_object, methods_to_search): """Searches for a method within code. :param method_search_object: object containing search parameters :param methods_to_search: list of methods to search for :returns: boolean indicating whether the method was found within APK """ for method_to_search in methods_to_search: # Get the class, method and descriptor parts. # Note that the method MUST be specified in smali format. [class_part, method_part, desc_part] = \ self.inst_analysis_utils.fn_get_class_method_desc_from_string( method_to_search ) # We consider subclasses as well. classes_inc_sub = [class_part] classes_inc_sub.extend( self.inst_analysis_utils.fn_find_subclasses(class_part)) # Search for all class/method combinations. for one_class in classes_inc_sub: logging.debug('Searching for method: ' + one_class + '->' + method_part + desc_part) all_methods.extend( self.inst_analysis_utils.fn_get_methods( one_class, method_part, desc_part)) # If at least one method is present. if len(all_methods) == 0: return False else: return True def fn_search_for_presence_of_class(self, class_search_object, classes_to_search): """Searches for a class within code. :param class_search_object: object containing search parameters :param search_strings: list of classes to search for :returns: boolean indicating whether the class was found within APK """ for class_to_search in classes_to_search: # We consider subclasses as well. classes_inc_sub = [class_to_search] classes_inc_sub.extend( self.inst_analysis_utils.fn_find_subclasses(class_to_search)) for one_class in classes_inc_sub: logging.debug('Searching for class: ' + one_class) all_classes.extend( self.inst_analysis_utils.fn_get_classes(one_class)) # If at least one class is present. if len(all_classes) == 0: return False else: return True def fn_search_for_calls_to_string(self, string_search_object, search_strings): """Searches for the presence of "calls" to a string of interest. This is actually just the same as searching for the presence of a string. Unlike classes or methods, a string can't be present within code without being called. However, this method allows us to get the calling class or method. :param string_search_object: object containing search parameters :param search_strings: list of strings to search for :returns: boolean indicating whether "calls" to the string were present """ bool_search_satisfied = False for search_string in search_strings: logging.debug('Searching for string: "' + search_string + '".') # Get calls to string (will be a list of EncodedMethod objects). calling_methods = self.inst_analysis_utils.fn_get_calls_to_string( search_string) # If no results were returned, then we needn't waste any more time. if len(calling_methods) == 0: continue # Check search locations and RETURNs. bool_one_search_satisfied = \ self.fn_process_search_location_and_returns( string_search_object, calling_methods ) if bool_one_search_satisfied == True: bool_search_satisfied = True return bool_search_satisfied def fn_search_for_calls_to_method(self, method_search_object, methods_to_search): """Searches for the presence of calls to a method of interest. :param method_search_object: object containing search parameters :param search_strings: list of methods to search for :returns: boolean indicating whether calls to the method were found """ bool_search_satisfied = False for method_to_search in methods_to_search: logging.debug('Searching for calls to method: "' + method_to_search + '".') # Get the class, method and descriptor parts. # Note that the method MUST be specified in smali format. [class_part, method_part, desc_part] = \ self.inst_analysis_utils.fn_get_class_method_desc_from_string( method_to_search ) # We consider subclasses as well. all_classes = [class_part] all_classes.extend( self.inst_analysis_utils.fn_find_subclasses(class_part)) # Get a set of methods that call the method of interest. calling_methods = [] for one_class in all_classes: calling_methods.extend( self.inst_analysis_utils.fn_get_calls_to_method( one_class, method_part, desc_part)) # If there were no methods calling the method of interest, # then return. if len(calling_methods) <= 0: continue # Check search locations and RETURNs. bool_one_search_satisfied = \ self.fn_process_search_location_and_returns( method_search_object, calling_methods ) if bool_one_search_satisfied == True: bool_search_satisfied = True return bool_search_satisfied def fn_search_for_calls_to_class(self, class_search_object, classes_to_search): """Searches for the presence of calls to a class of interest. :param class_search_object: object containing search parameters :param search_strings: list of classes to search for :returns: boolean indicating whether calls to the the class were found """ bool_search_satisfied = False for class_to_search in classes_to_search: # We consider subclasses as well. classes_inc_sub = [class_to_search] classes_inc_sub.extend( self.inst_analysis_utils.fn_find_subclasses(class_to_search)) # Get a set of methods that call the class of interest. calling_methods = [] for one_class in classes_inc_sub: logging.debug('Searching for calls to class: ' + one_class) calling_methods.extend( self.inst_analysis_utils.fn_get_calls_to_class(one_class)) # If no results were returned, then we needn't waste any more time. if len(calling_methods) == 0: continue # Check search locations and RETURNs. bool_one_search_satisfied = \ self.fn_process_search_location_and_returns( class_search_object, calling_methods ) if bool_one_search_satisfied == True: bool_search_satisfied = True return bool_search_satisfied def fn_process_search_location_and_returns(self, search_object, calling_methods): """Filters methods for search location criteria, and process RETURNs. :param search_object: dictionary object for a single search type :param calling_methods: list of EncodedMethod objects :returns: boolean indicating whether any of the calling methods correspond to the search location specified in the search object """ bool_search_satisfied = False # If no search location was specified in the template, then # we assume code-wide search, and the location search part is done. if 'SEARCHLOCATION' not in search_object: bool_search_satisfied = True # If a search location *was* specified, # then further filtering is needed. if 'SEARCHLOCATION' in search_object: methods_satisfying_location_requirements = \ self.fn_get_methods_satisfying_location_reqs( calling_methods, search_object['SEARCHLOCATION'] ) if len(methods_satisfying_location_requirements) > 0: bool_search_satisfied = True calling_methods = methods_satisfying_location_requirements # If there are no RETURNs to process, then we're done. if 'RETURN' not in search_object: return bool_search_satisfied self.fn_analyse_returns(search_object, calling_methods) return bool_search_satisfied def fn_get_methods_satisfying_location_reqs(self, methods, location): """Checks which input methods satisfy location criteria. :param methods: list of EncodedMethod objects :param location: string describing search location (in smali) :returns: list of EncodedMethod objects that satisfy the search location criteria """ output_methods = [] location_type = '<class>' location_exclusion = False if 'NOT ' in location: location_exclusion = True location = location.replace('NOT ', '') if ':' in location: location_split = location.split(':') # Location type has limited options. location_type = location_split[0] # Location value could be a fixed value or a link value. location_value = location_split[1] else: location_value = location location_values = [] if location_value[0] == '@': location_values = self.inst_analysis_utils.fn_get_linked_items( self.current_links, location_value) else: location_values = [location_value] # Check each calling method against each expected location value. for input_method in methods: for location_value in location_values: is_satisfied = self.fn_check_callers_against_expectation( input_method, location_value, location_type, location_exclusion) if is_satisfied == True: output_methods.append(input_method) return output_methods def fn_check_callers_against_expectation(self, method, location_value, location_type, exclude_match): """Checks a method against an expected pattern. :param method: EncodedMethod object to check :param location_value: string denoting the location to match against :param location_type: string value of either "<class>" or "<method>", indicating which part of the location to match against. Note that "<method>" will match against the composite class->method, while "<class>" will match against only the class part. :returns: boolean indicating whether the method satsifies the location criteria """ is_satisfied = False # Available signature, as class/method/descriptor. [class_part, method_part, desc_part] = \ self.inst_analysis_utils.fn_get_class_method_desc_from_method( method ) # Expected signature, as class/method/descriptor. [exp_class_part, exp_method_part, exp_desc_part] = \ self.inst_analysis_utils.fn_get_class_method_desc_from_string( location_value ) # Perform the checks. # If the location type is class, then we only compare the class parts. # Otherwise, we compare class, method and descriptor parts. if location_type == '<class>': if exp_class_part.endswith('*'): if class_part.startswith(exp_class_part.replace('*', '')): is_satisfied = True else: if class_part == exp_class_part: is_satisfied = True elif location_type == '<method>': if ((class_part == exp_class_part) and (method_part == exp_method_part) and (desc_part == exp_desc_part)): is_satisfied = True if exclude_match == True: is_satisfied = not is_satisfied return is_satisfied def fn_analyse_returns(self, return_object, return_candidates): """Analyses the returns list against the expected returns. :param return_object: dictionary object containing returnable items :param return_candidates: list of EncodedMethod objects to process according to the rules specified in the return_object """ # Returnable items. returnables = return_object['RETURN'] # Generalise the returnables to a list. if type(returnables) is list: returnable_elements = returnables elif ',' in returnables: returnable_elements = returnables.split(',') else: returnable_elements = [returnables] # Process each returnable item. for return_element in returnable_elements: returnable_element_name = return_element.split(' AS ')[1] return_type = return_element.split(' AS ')[0] for return_candidate in return_candidates: self.fn_process_returnable_item(return_candidate, return_type, returnable_element_name) def fn_process_returnable_item(self, return_candidate, return_type, element_name): """Creates a return object and appends to current returns. This function will process the EncodedMethod object, extract the relevant parts of information from it, and create an output object, which it will append to the list of returns. :param return_candidate: EncodedMethod object to be processed as a returnable element :param return_type: string value of either "<class>" or "<method>", indicating which part of the method to append to returns. Note that "<method>" will retain the composite class->method, while "<class>" will return only the class part. :param element_name: string name under which to store the return item """ [class_name, method_name, desc_name] = \ self.inst_analysis_utils.fn_get_class_method_desc_from_method( return_candidate ) output_obj = {} if return_type == '<class>': output_obj[element_name] = class_name elif return_type == '<method>': full_method = class_name + '->' + method_name + desc_name output_obj[element_name] = full_method self.current_returns.append(output_obj)