class CryptoIdentifier(): """ This class contains the logic to perform Crypto identification. Two techniques are currently supported: 1. A heuristic approach that identifies functions and basic blocks based on the ratio of arithmetic/logic instructions to all instructions 2. A signature-based approach, using the signatures defined in PatternManager """ def __init__(self): self.name = "CryptoIdentifier" print("[*] loading CryptoIdentifier") self.time = time self.re = re self.GraphHelper = GraphHelper self.CryptoSignatureHit = CryptoSignatureHit self.AritlogBasicBlock = AritlogBasicBlock self.Segment = Segment self.pm = PatternManager() self.low_rating_threshold = 0.4 self.high_rating_threshold = 1.0 self.low_instruction_threshold = 8 self.high_instruction_threshold = 100 # if the threshold is set to this value, it is automatically expanded to infinite. self.max_instruction_threshold = 100 self.low_call_threshold = 0 self.high_call_threshold = 1 # if the threshold is set to this value, it is automatically expanded to infinite. self.max_call_threshold = 10 # if at least this fraction of a signature's length' has been identified # consecutively, the location is marked as a signature hit. self.match_filter_factor = 0.5 self.aritlog_blocks = [] self.signature_hits = [] self.ida_proxy = IdaProxy() return def scan(self): """ Scan the whole IDB with all available techniques. """ self.scanAritlog() self.scanCryptoPatterns() ################################################################################ # Aritlog scanning ################################################################################ def scanAritlog(self): """ scan with the arithmetic/logic heuristic @return: a list of AritLogBasicBlock data objects that fulfill the parameters as specified """ print("[*] CryptoIdentifier: Starting aritlog heuristic analysis.") self.aritlog_blocks = [] time_before = self.time.time() for function_ea in self.ida_proxy.Functions(): function_chart = self.ida_proxy.FlowChart( self.ida_proxy.get_func(function_ea)) calls_in_function = 0 function_blocks = [] function_dgraph = {} blocks_in_loops = set() for current_block in function_chart: block = self.AritlogBasicBlock(current_block.startEA, current_block.endEA) for instruction in self.ida_proxy.Heads( block.start_ea, block.end_ea): if self.ida_proxy.isCode( self.ida_proxy.GetFlags(instruction)): mnemonic = self.ida_proxy.GetMnem(instruction) has_identical_operands = self.ida_proxy.GetOperandValue(instruction, 0) == \ self.ida_proxy.GetOperandValue(instruction, 1) block.updateInstructionCount(mnemonic, has_identical_operands) if mnemonic == "call": calls_in_function += 1 function_blocks.append(block) # prepare graph for Tarjan's algorithm succeeding_blocks = [ succ.startEA for succ in current_block.succs() ] function_dgraph[current_block.startEA] = succeeding_blocks # add trivial loops if current_block.startEA in succeeding_blocks: block.is_contained_in_trivial_loop = True blocks_in_loops.update([current_block.startEA]) # perform Tarjan's algorithm to identify strongly connected components (= loops) in the function graph graph_helper = self.GraphHelper() strongly_connected = graph_helper.calculateStronglyConnectedComponents( function_dgraph) non_trivial_loops = [ component for component in strongly_connected if len(component) > 1 ] for component in non_trivial_loops: for block in component: blocks_in_loops.update([block]) for block in function_blocks: if block.start_ea in blocks_in_loops: block.is_contained_in_loop = True block.num_calls_in_function = calls_in_function self.aritlog_blocks.extend(function_blocks) print("[*] Heuristics analysis took %3.2f seconds." % (self.time.time() - time_before)) return self.getAritlogBlocks( self.low_rating_threshold, self.high_rating_threshold, self.low_instruction_threshold, self.high_instruction_threshold, self.low_call_threshold, self.high_call_threshold, False, False, False) def _updateThresholds(self, min_rating, max_rating, min_instr, max_instr, min_call, max_call): """ update all six threshold bounds @param min_rating: the minimum arit/log ratio a basic block must have @type min_rating: float @param max_rating: the maximum arit/log ratio a basic block can have @type max_rating: float @param min_instr: the minimum number of instructions a basic block must have @type min_instr: int @param max_instr: the minimum number of instructions a basic block can have @type max_instr: int @param min_call: the minimum number of calls a basic block must have @type min_call: int @param max_call: the minimum number of calls a basic block can have @type max_call: int """ self.low_rating_threshold = max(0.0, min_rating) self.high_rating_threshold = min(1.0, max_rating) self.low_instruction_threshold = max(0, min_instr) if max_instr >= self.max_instruction_threshold: # we cap the value here and safely assume there is no block with more than 1000000 instructions self.high_instruction_threshold = 1000000 else: self.high_instruction_threshold = max_instr self.low_call_threshold = max(0, min_call) if max_call >= self.max_call_threshold: # we cap the value here and safely assume there is no block with more than 1000000 instructions self.high_call_threshold = 1000000 else: self.high_call_threshold = max_call def getAritlogBlocks(self, min_rating, max_rating, min_instr, max_instr, min_api, max_api, is_nonzero, \ is_looped, is_trivially_looped): """ get all blocks that are within the limits specified by the heuristic parameters. parameters are the same as in function "_updateThresholds" except param is_nonzero: defines whether zeroing instructions (like xor eax, eax) shall be counted or not. type is_nonzero: boolean param is_looped: defines whether only basic blocks in loops shall be selected type is_looped: boolean @return: a list of AritlogBasicBlock data objects, according to the parameters """ self._updateThresholds(min_rating, max_rating, min_instr, max_instr, min_api, max_api) return [ block for block in self.aritlog_blocks if (self.high_rating_threshold >= block.getAritlogRating( is_nonzero) >= self.low_rating_threshold) and ( self.high_instruction_threshold >= block.num_instructions >= self.low_instruction_threshold) and ( self.high_call_threshold >= block.num_calls_in_function >= self.low_call_threshold) and (not is_looped or block.is_contained_in_loop) and ( not is_trivially_looped or block.is_contained_in_trivial_loop) ] def getUnfilteredBlockCount(self): """ returns the number of basic blocks that have been analyzed. @return: (int) number of basic blocks """ return len(self.aritlog_blocks) ################################################################################ # Signature scanning ################################################################################ def getSegmentData(self): """ returns the raw bytes of the segments as stored by IDA @return: a list of Segment data objects. """ segments = [] for segment_ea in self.ida_proxy.Segments(): try: segment = self.Segment() segment.start_ea = segment_ea segment.end_ea = self.ida_proxy.SegEnd(segment_ea) segment.name = self.ida_proxy.SegName(segment_ea) buf = "" for ea in helpers.Misc.lrange( segment_ea, self.ida_proxy.SegEnd(segment_ea)): buf += chr(self.ida_proxy.get_byte(ea)) segment.data = buf segments.append(segment) except: print( "[!] Tried to access invalid segment data. An error has occurred while address conversion" ) return segments def scanCryptoPatterns(self, pattern_size=32): crypt_results = [] print("[*] CryptoIdentifier: Starting crypto signature scanning.") time_before_matching = self.time.time() segments = self.getSegmentData() keywords = self.pm.getTokenizedSignatures(pattern_size) for keyword in keywords.keys(): for segment in segments: crypt_results.extend([ self.CryptoSignatureHit(segment.start_ea + match.start(), keywords[keyword], keyword) for match in self.re.finditer(self.re.escape(keyword), segment.data) ]) variable_matches = self.scanVariablePatterns() crypt_results.extend(variable_matches) print("[*] Full matching took %3.2f seconds and resulted in %d hits." % (self.time.time() - time_before_matching, len(crypt_results))) self.signature_hits = crypt_results return crypt_results def scanVariablePatterns(self): # the scanning code is roughly based on kyprizel's signature scan, see credtis above for more information crypt_results = [] variable_signatures = self.pm.getVariableSignatures() for var_sig in variable_signatures.keys(): current_seg = self.ida_proxy.FirstSeg() seg_end = self.ida_proxy.SegEnd(current_seg) while current_seg != self.ida_proxy.BAD_ADDR: signature_hit = self.ida_proxy.find_binary( current_seg, seg_end, variable_signatures[var_sig], 16, 1) if signature_hit != self.ida_proxy.BAD_ADDR: crypt_results.append( self.CryptoSignatureHit(signature_hit, [var_sig], variable_signatures[var_sig])) current_seg = signature_hit + variable_signatures[ var_sig].count(" ") + 1 else: current_seg = self.ida_proxy.NextSeg(seg_end) if not current_seg == self.ida_proxy.BAD_ADDR: seg_end = self.ida_proxy.SegEnd(current_seg) return crypt_results def getSignatureLength(self, signature_name): """ returns the length for a signature, identified by its name @param signature_name: name for a signature, e.g. "ADLER 32" @type signature_name: str @return: (int) length of the signature. """ for item in self.pm.signatures.items(): if item[1] == signature_name: return len(item[0]) return 0 def getSignatureHits(self): """ Get all signature hits that have a length of at least match_filter_factor percent of the signature they triggered. Hits are grouped by signature names. @return: a dictionary with key/value entries of the following form: ("signature name", [CryptoSignatureHit]) """ sorted_hits = sorted(self.signature_hits) unified_hits = [] previous_signature_names = [] for hit in sorted_hits: hit_intersection = [ element for element in hit.signature_names if element in previous_signature_names ] if len(hit_intersection) == 0: previous_signature_names = hit.signature_names unified_hits.append(self.CryptoSignatureHit(hit.start_address, hit.signature_names, \ hit.matched_signature)) else: previous_signature_names = hit_intersection previous_hit = unified_hits[-1] if hit.start_address == previous_hit.start_address + len( previous_hit.matched_signature): previous_hit.matched_signature += hit.matched_signature previous_hit.signature_names = hit_intersection else: unified_hits.append(self.CryptoSignatureHit(hit.start_address, hit.signature_names, \ hit.matched_signature)) filtered_hits = [] for hit in unified_hits: if len(hit.matched_signature) >= max([ self.match_filter_factor * self.getSignatureLength(name) for name in hit.signature_names ]): hit.code_refs_to = self.getXrefsToAddress(hit.start_address) filtered_hits.append(hit) grouped_hits = {} for hit in filtered_hits: for name in hit.signature_names: if name not in grouped_hits: grouped_hits[name] = [hit] else: grouped_hits[name].append(hit) return grouped_hits def getXrefsToAddress(self, address): """ get all references to a certain address. These are no xrefs in IDA sense but references to the crypto signatures. If the signature points to an instruction, e.g. if a constant is moved to a register, the return is flagged as "True", meaning it is an in-code reference. @param address: an arbitrary address @type address: int @return: a list of tuples (int, boolean) """ xrefs = [] head_to_address = self.ida_proxy.PrevHead(address, address - 14) if head_to_address != 0xFFFFFFFF: flags = self.ida_proxy.GetFlags(head_to_address) if self.ida_proxy.isCode(flags): xrefs.append((head_to_address, True)) for x in self.ida_proxy.XrefsTo(address): flags = self.ida_proxy.GetFlags(x.frm) if self.ida_proxy.isCode(flags): xrefs.append((x.frm, False)) return xrefs
class DocumentationHelper(): """ This class handles instruction coloring. """ # data layout of color maps layout_color_map = {"tag": {"base_color": 0x112233, "highlight_color": 0x445566}} def __init__(self, idascope_config): print ("[|] loading DocumentationHelper") self.ida_proxy = IdaProxy() # default colors are grey / light red / red self.default_neutral_color = 0xCCCCCC self.default_base_color = 0xB3B3FF self.default_highlight_color = 0x3333FF self.color_state = "unknown" self.idascope_config = idascope_config self._loadConfig(self.idascope_config.semantics_file) return def _loadConfig(self, config_filename): """ Loads a semantic configuration file and generates a color map from the contained information. @param config_filename: filename of a semantic configuration file @type config_filename: str """ config_file = open(config_filename, "r") config = config_file.read() parsed_config = json.loads(config, object_hook=JsonHelper.decode_dict) self.default_neutral_color = int(parsed_config["default_neutral_color"], 16) self.default_base_color = int(parsed_config["default_base_color"], 16) self.default_highlight_color = int(parsed_config["default_highlight_color"], 16) self.color_map = self._generateColorMapFromDefinitions(parsed_config) return def _generateColorMapFromDefinitions(self, config): """ Internal function to generate a color map from a semantic definitions config file. @param definitions: the defintions part of a semantic definitions config file. @type definitions: dict @return: a dictionary of a color map, see I{layout_color_map} for a reference """ color_map = {} for definition in config["semantic_definitions"]: # convert text representation of color codes to numbers group_colors = self._getColorsForGroup(definition["group"], config) color_map[definition["tag"]] = {"base_color": int(group_colors[0], 16), \ "highlight_color": int(group_colors[1], 16)} return color_map def _getColorsForGroup(self, target_group, config): for group in config["semantic_groups"]: if group["tag"] == target_group: return (group["base_color"], group["highlight_color"]) print "[-] Failed to get colors for group \"%s\" - you might want to check your semantics file." % target_group return (self.default_base_color, self.default_highlight_color) def uncolorAll(self): """ Uncolors all instructions of all segments by changing their color to white. """ for seg_ea in self.ida_proxy.Segments(): for function_address in self.ida_proxy.Functions(self.ida_proxy.SegStart(seg_ea), \ self.ida_proxy.SegEnd(seg_ea)): for block in self.ida_proxy.FlowChart(self.ida_proxy.get_func(function_address)): for head in self.ida_proxy.Heads(block.startEA, block.endEA): self.colorInstruction(head, 0xFFFFFF, refresh=False) self.ida_proxy.refresh_idaview_anyway() def colorInstruction(self, address, color, refresh=True): """ Colors the instruction at an address with the given color code. @param address: address of the instruction to color @type address: int @param color: color-code to set for the instruction @type color: int (0xBBGGRR) @param refresh: refresh IDA view to ensure the color shows directly, can be omitted for performance. @type refresh: boolean """ self.ida_proxy.SetColor(address, self.ida_proxy.CIC_ITEM, color) if refresh: self.ida_proxy.refresh_idaview_anyway() def colorBasicBlock(self, address, color, refresh=True): """ Colors the basic block containing a target address with the given color code. @param address: address an instruction in the basic block to color @type address: int @param color: color-code to set for the instruction @type color: int (0xBBGGRR) @param refresh: refresh IDA view to ensure the color shows directly, can be omitted for performance. @type refresh: boolean """ function_chart = self.ida_proxy.FlowChart(self.ida_proxy.get_func(address)) for block in function_chart: if block.startEA <= address < block.endEA: for head in self.ida_proxy.Heads(block.startEA, block.endEA): self.colorInstruction(head, color, refresh) def getNextColorScheme(self): """ get the next color scheme in the three-cycle "individual/mono/uncolored", where individual is semantic coloring @return: next state """ if self.color_state == "individual": return "mono" elif self.color_state == "mono": return "uncolored" elif self.color_state == "uncolored": return "individual" else: return "individual" def selectHighlightColor(self, tag): """ automatically chooses the highlight color for a tag based on the current color scheme @return: (int) a color code """ if self.getNextColorScheme() == "uncolored": return 0xFFFFFF elif self.getNextColorScheme() == "mono": return self.default_highlight_color else: return self.color_map[tag]["highlight_color"] def selectBaseColor(self, tagged_addresses_in_block): """ automatically chooses the base color for a block based on the current color scheme @param tagged_addresses_in_block: all tagged addresses in a basic block for which the color shall be chosen @type tagged_addresses_in_block: a list of tuples (int, str) containing pairs of instruction addresses and tags @return: (int) a color code """ if self.getNextColorScheme() == "uncolored": return 0xFFFFFF elif self.getNextColorScheme() == "mono": return self.default_base_color else: tags_in_block = [item[1] for item in tagged_addresses_in_block] colors_in_block = set([self.color_map[tags_in_block[index]]["base_color"] \ for index in xrange(len(tags_in_block))]) if len(colors_in_block) == 1: return colors_in_block.pop() else: return self.default_neutral_color def colorize(self, scan_result): """ perform coloring on the IDB, based on a scan performed by SemanticIdentifier @param scan_result: result of a scan as performed by SemanticIdentifier @type scan_result: a dictionary with key/value entries of the following form: (address, [FunctionContext]) """ for function_address in scan_result.keys(): tagged_addresses_in_function = scan_result[function_address].getAllTaggedAddresses() function_chart = self.ida_proxy.FlowChart(self.ida_proxy.get_func(function_address)) for basic_block in function_chart: tagged_addresses_in_block = [(addr, tagged_addresses_in_function[addr]) for addr in \ tagged_addresses_in_function.keys() if addr in xrange(basic_block.startEA, basic_block.endEA)] if len(tagged_addresses_in_block) > 0: base_color = self.selectBaseColor(tagged_addresses_in_block) self.colorBasicBlock(basic_block.startEA, base_color, refresh=False) for tagged_address in tagged_addresses_in_block: highlight_color = self.selectHighlightColor(tagged_address[1]) self.colorInstruction(tagged_address[0], highlight_color, refresh=False) self.color_state = self.getNextColorScheme() self.ida_proxy.refresh_idaview_anyway() def getNextNonFuncInstruction(self, addr): next_instruction = addr while next_instruction != self.ida_proxy.BAD_ADDR: next_instruction = self.ida_proxy.find_not_func(next_instruction, self.ida_proxy.SEARCH_DOWN) flags = self.ida_proxy.GetFlags(next_instruction) if self.ida_proxy.isCode(flags): return next_instruction return self.ida_proxy.BAD_ADDR def convertNonFunctionCode(self): self.convertAnyProloguesToFunctions() # do a second run to define the rest next_instruction = self.ida_proxy.minEA() while next_instruction != self.ida_proxy.BAD_ADDR: next_instruction = self.getNextNonFuncInstruction(next_instruction) print("[+] Fixed undefined code to function @ [%08x]" % \ (next_instruction)) self.ida_proxy.MakeFunction(next_instruction) return def convertAnyProloguesToFunctions(self): self.convertDataWithPrologueToCode() self.convertNonFunctionCodeWithPrologues() def convertNonFunctionCodeWithPrologues(self): next_instruction = self.ida_proxy.minEA() while next_instruction != self.ida_proxy.BAD_ADDR: next_instruction = self.getNextNonFuncInstruction(next_instruction) if self.ida_proxy.GetMnem(next_instruction).startswith("push") and \ self.ida_proxy.GetOpType(next_instruction, 0) == 1 and \ self.ida_proxy.GetOperandValue(next_instruction, 0) == 5: instruction_after_push = self.getNextNonFuncInstruction(next_instruction) if self.ida_proxy.GetMnem(instruction_after_push).startswith("mov") and \ self.ida_proxy.GetOpType(instruction_after_push, 0) == 1 and \ self.ida_proxy.GetOperandValue(instruction_after_push, 0) == 5 and \ self.ida_proxy.GetOpType(instruction_after_push, 1) == 1 and \ self.ida_proxy.GetOperandValue(instruction_after_push, 1) == 4: print("[+] Fixed undefined code with function prologue (push ebp; mov ebp, esp) to function " \ + "@ [%08x]" % (next_instruction)) self.ida_proxy.MakeFunction(next_instruction) def convertDataWithPrologueToCode(self): current_seg = self.ida_proxy.FirstSeg() seg_end = self.ida_proxy.SegEnd(current_seg) while current_seg != self.ida_proxy.BAD_ADDR: signature_hit = self.ida_proxy.find_binary(current_seg, seg_end, "55 8B EC", 16, 1) if signature_hit != self.ida_proxy.BAD_ADDR: flags = self.ida_proxy.GetFlags(signature_hit) if not self.ida_proxy.isCode(flags): self.ida_proxy.MakeFunction(signature_hit) print("[+] Fixed undefined data with potential function prologue (push ebp; mov ebp, esp) to function " \ + "@ [%08x]" % (signature_hit)) current_seg = signature_hit + 3 + 1 else: current_seg = self.ida_proxy.NextSeg(seg_end) if not current_seg == self.ida_proxy.BAD_ADDR: seg_end = self.ida_proxy.SegEnd(current_seg)
class CryptoIdentifier(): """ This class contains the logic to perform Crypto identification. Two techniques are currently supported: 1. A heuristic approach that identifies functions and basic blocks based on the ratio of arithmetic/logic instructions to all instructions 2. A signature-based approach, using the signatures defined in PatternManager """ def __init__(self): self.name = "CryptoIdentifier" print ("[*] loading CryptoIdentifier") self.time = time self.re = re self.GraphHelper = GraphHelper self.CryptoSignatureHit = CryptoSignatureHit self.AritlogBasicBlock = AritlogBasicBlock self.Segment = Segment self.pm = PatternManager() self.low_rating_threshold = 0.4 self.high_rating_threshold = 1.0 self.low_instruction_threshold = 8 self.high_instruction_threshold = 100 # if the threshold is set to this value, it is automatically expanded to infinite. self.max_instruction_threshold = 100 self.low_call_threshold = 0 self.high_call_threshold = 1 # if the threshold is set to this value, it is automatically expanded to infinite. self.max_call_threshold = 10 # if at least this fraction of a signature's length' has been identified # consecutively, the location is marked as a signature hit. self.match_filter_factor = 0.5 self.aritlog_blocks = [] self.signature_hits = [] self.ida_proxy = IdaProxy() return def scan(self): """ Scan the whole IDB with all available techniques. """ self.scanAritlog() self.scanCryptoPatterns() ################################################################################ # Aritlog scanning ################################################################################ def scanAritlog(self): """ scan with the arithmetic/logic heuristic @return: a list of AritLogBasicBlock data objects that fulfill the parameters as specified """ print ("[*] CryptoIdentifier: Starting aritlog heuristic analysis.") self.aritlog_blocks = [] time_before = self.time.time() for function_ea in self.ida_proxy.Functions(): function_chart = self.ida_proxy.FlowChart(self.ida_proxy.get_func(function_ea)) calls_in_function = 0 function_blocks = [] function_dgraph = {} blocks_in_loops = set() for current_block in function_chart: block = self.AritlogBasicBlock(current_block.startEA, current_block.endEA) for instruction in self.ida_proxy.Heads(block.start_ea, block.end_ea): if self.ida_proxy.isCode(self.ida_proxy.GetFlags(instruction)): mnemonic = self.ida_proxy.GetMnem(instruction) has_identical_operands = self.ida_proxy.GetOperandValue(instruction, 0) == \ self.ida_proxy.GetOperandValue(instruction, 1) block.updateInstructionCount(mnemonic, has_identical_operands) if mnemonic == "call": calls_in_function += 1 function_blocks.append(block) # prepare graph for Tarjan's algorithm succeeding_blocks = [succ.startEA for succ in current_block.succs()] function_dgraph[current_block.startEA] = succeeding_blocks # add trivial loops if current_block.startEA in succeeding_blocks: block.is_contained_in_trivial_loop = True blocks_in_loops.update([current_block.startEA]) # perform Tarjan's algorithm to identify strongly connected components (= loops) in the function graph graph_helper = self.GraphHelper() strongly_connected = graph_helper.calculateStronglyConnectedComponents(function_dgraph) non_trivial_loops = [component for component in strongly_connected if len(component) > 1] for component in non_trivial_loops: for block in component: blocks_in_loops.update([block]) for block in function_blocks: if block.start_ea in blocks_in_loops: block.is_contained_in_loop = True block.num_calls_in_function = calls_in_function self.aritlog_blocks.extend(function_blocks) print ("[*] Heuristics analysis took %3.2f seconds." % (self.time.time() - time_before)) return self.getAritlogBlocks(self.low_rating_threshold, self.high_rating_threshold, self.low_instruction_threshold, self.high_instruction_threshold, self.low_call_threshold, self.high_call_threshold, False, False, False) def _updateThresholds(self, min_rating, max_rating, min_instr, max_instr, min_call, max_call): """ update all six threshold bounds @param min_rating: the minimum arit/log ratio a basic block must have @type min_rating: float @param max_rating: the maximum arit/log ratio a basic block can have @type max_rating: float @param min_instr: the minimum number of instructions a basic block must have @type min_instr: int @param max_instr: the minimum number of instructions a basic block can have @type max_instr: int @param min_call: the minimum number of calls a basic block must have @type min_call: int @param max_call: the minimum number of calls a basic block can have @type max_call: int """ self.low_rating_threshold = max(0.0, min_rating) self.high_rating_threshold = min(1.0, max_rating) self.low_instruction_threshold = max(0, min_instr) if max_instr >= self.max_instruction_threshold: # we cap the value here and safely assume there is no block with more than 1000000 instructions self.high_instruction_threshold = 1000000 else: self.high_instruction_threshold = max_instr self.low_call_threshold = max(0, min_call) if max_call >= self.max_call_threshold: # we cap the value here and safely assume there is no block with more than 1000000 instructions self.high_call_threshold = 1000000 else: self.high_call_threshold = max_call def getAritlogBlocks(self, min_rating, max_rating, min_instr, max_instr, min_api, max_api, is_nonzero, \ is_looped, is_trivially_looped): """ get all blocks that are within the limits specified by the heuristic parameters. parameters are the same as in function "_updateThresholds" except param is_nonzero: defines whether zeroing instructions (like xor eax, eax) shall be counted or not. type is_nonzero: boolean param is_looped: defines whether only basic blocks in loops shall be selected type is_looped: boolean @return: a list of AritlogBasicBlock data objects, according to the parameters """ self._updateThresholds(min_rating, max_rating, min_instr, max_instr, min_api, max_api) return [block for block in self.aritlog_blocks if (self.high_rating_threshold >= block.getAritlogRating(is_nonzero) >= self.low_rating_threshold) and (self.high_instruction_threshold >= block.num_instructions >= self.low_instruction_threshold) and (self.high_call_threshold >= block.num_calls_in_function >= self.low_call_threshold) and (not is_looped or block.is_contained_in_loop) and (not is_trivially_looped or block.is_contained_in_trivial_loop)] def getUnfilteredBlockCount(self): """ returns the number of basic blocks that have been analyzed. @return: (int) number of basic blocks """ return len(self.aritlog_blocks) ################################################################################ # Signature scanning ################################################################################ def getSegmentData(self): """ returns the raw bytes of the segments as stored by IDA @return: a list of Segment data objects. """ segments = [] for segment_ea in self.ida_proxy.Segments(): try: segment = self.Segment() segment.start_ea = segment_ea segment.end_ea = self.ida_proxy.SegEnd(segment_ea) segment.name = self.ida_proxy.SegName(segment_ea) buf = "" for ea in helpers.Misc.lrange(segment_ea, self.ida_proxy.SegEnd(segment_ea)): buf += chr(self.ida_proxy.get_byte(ea)) segment.data = buf segments.append(segment) except: print ("[!] Tried to access invalid segment data. An error has occurred while address conversion") return segments def scanCryptoPatterns(self, pattern_size=32): crypt_results = [] print ("[*] CryptoIdentifier: Starting crypto signature scanning.") time_before_matching = self.time.time() segments = self.getSegmentData() keywords = self.pm.getTokenizedSignatures(pattern_size) for keyword in keywords.keys(): for segment in segments: crypt_results.extend([self.CryptoSignatureHit(segment.start_ea + match.start(), keywords[keyword], keyword) for match in self.re.finditer(self.re.escape(keyword), segment.data)]) variable_matches = self.scanVariablePatterns() crypt_results.extend(variable_matches) print ("[*] Full matching took %3.2f seconds and resulted in %d hits." % (self.time.time() - time_before_matching, len(crypt_results))) self.signature_hits = crypt_results return crypt_results def scanVariablePatterns(self): # the scanning code is roughly based on kyprizel's signature scan, see credtis above for more information crypt_results = [] variable_signatures = self.pm.getVariableSignatures() for var_sig in variable_signatures.keys(): current_seg = self.ida_proxy.FirstSeg() seg_end = self.ida_proxy.SegEnd(current_seg) while current_seg != self.ida_proxy.BAD_ADDR: signature_hit = self.ida_proxy.find_binary(current_seg, seg_end, variable_signatures[var_sig], 16, 1) if signature_hit != self.ida_proxy.BAD_ADDR: crypt_results.append(self.CryptoSignatureHit(signature_hit, [var_sig], variable_signatures[var_sig])) current_seg = signature_hit + variable_signatures[var_sig].count(" ") + 1 else: current_seg = self.ida_proxy.NextSeg(seg_end) if not current_seg == self.ida_proxy.BAD_ADDR: seg_end = self.ida_proxy.SegEnd(current_seg) return crypt_results def getSignatureLength(self, signature_name): """ returns the length for a signature, identified by its name @param signature_name: name for a signature, e.g. "ADLER 32" @type signature_name: str @return: (int) length of the signature. """ for item in self.pm.signatures.items(): if item[1] == signature_name: return len(item[0]) return 0 def getSignatureHits(self): """ Get all signature hits that have a length of at least match_filter_factor percent of the signature they triggered. Hits are grouped by signature names. @return: a dictionary with key/value entries of the following form: ("signature name", [CryptoSignatureHit]) """ sorted_hits = sorted(self.signature_hits) unified_hits = [] previous_signature_names = [] for hit in sorted_hits: hit_intersection = [element for element in hit.signature_names if element in previous_signature_names] if len(hit_intersection) == 0: previous_signature_names = hit.signature_names unified_hits.append(self.CryptoSignatureHit(hit.start_address, hit.signature_names, \ hit.matched_signature)) else: previous_signature_names = hit_intersection previous_hit = unified_hits[-1] if hit.start_address == previous_hit.start_address + len(previous_hit.matched_signature): previous_hit.matched_signature += hit.matched_signature previous_hit.signature_names = hit_intersection else: unified_hits.append(self.CryptoSignatureHit(hit.start_address, hit.signature_names, \ hit.matched_signature)) filtered_hits = [] for hit in unified_hits: if len(hit.matched_signature) >= max([self.match_filter_factor * self.getSignatureLength(name) for name in hit.signature_names]): hit.code_refs_to = self.getXrefsToAddress(hit.start_address) filtered_hits.append(hit) grouped_hits = {} for hit in filtered_hits: for name in hit.signature_names: if name not in grouped_hits: grouped_hits[name] = [hit] else: grouped_hits[name].append(hit) return grouped_hits def getXrefsToAddress(self, address): """ get all references to a certain address. These are no xrefs in IDA sense but references to the crypto signatures. If the signature points to an instruction, e.g. if a constant is moved to a register, the return is flagged as "True", meaning it is an in-code reference. @param address: an arbitrary address @type address: int @return: a list of tuples (int, boolean) """ xrefs = [] head_to_address = self.ida_proxy.PrevHead(address, address - 14) if head_to_address != 0xFFFFFFFF: flags = self.ida_proxy.GetFlags(head_to_address) if self.ida_proxy.isCode(flags): xrefs.append((head_to_address, True)) for x in self.ida_proxy.XrefsTo(address): flags = self.ida_proxy.GetFlags(x.frm) if self.ida_proxy.isCode(flags): xrefs.append((x.frm, False)) return xrefs
class DocumentationHelper(): """ This class handles instruction coloring. """ # data layout of color maps layout_color_map = { "tag": { "base_color": 0x112233, "highlight_color": 0x445566 } } def __init__(self, idascope_config): print("[|] loading DocumentationHelper") self.ida_proxy = IdaProxy() # default colors are grey / light red / red self.default_neutral_color = 0xCCCCCC self.default_base_color = 0xB3B3FF self.default_highlight_color = 0x3333FF self.color_state = "unknown" self.idascope_config = idascope_config self._loadConfig(self.idascope_config.semantics_file) return def _loadConfig(self, config_filename): """ Loads a semantic configuration file and generates a color map from the contained information. @param config_filename: filename of a semantic configuration file @type config_filename: str """ config_file = open(config_filename, "r") config = config_file.read() parsed_config = json.loads(config, object_hook=JsonHelper.decode_dict) self.default_neutral_color = int( parsed_config["default_neutral_color"], 16) self.default_base_color = int(parsed_config["default_base_color"], 16) self.default_highlight_color = int( parsed_config["default_highlight_color"], 16) self.color_map = self._generateColorMapFromDefinitions(parsed_config) return def _generateColorMapFromDefinitions(self, config): """ Internal function to generate a color map from a semantic definitions config file. @param definitions: the defintions part of a semantic definitions config file. @type definitions: dict @return: a dictionary of a color map, see I{layout_color_map} for a reference """ color_map = {} for definition in config["semantic_definitions"]: # convert text representation of color codes to numbers group_colors = self._getColorsForGroup(definition["group"], config) color_map[definition["tag"]] = {"base_color": int(group_colors[0], 16), \ "highlight_color": int(group_colors[1], 16)} return color_map def _getColorsForGroup(self, target_group, config): for group in config["semantic_groups"]: if group["tag"] == target_group: return (group["base_color"], group["highlight_color"]) print "[-] Failed to get colors for group \"%s\" - you might want to check your semantics file." % target_group return (self.default_base_color, self.default_highlight_color) def uncolorAll(self): """ Uncolors all instructions of all segments by changing their color to white. """ for seg_ea in self.ida_proxy.Segments(): for function_address in self.ida_proxy.Functions(self.ida_proxy.SegStart(seg_ea), \ self.ida_proxy.SegEnd(seg_ea)): for block in self.ida_proxy.FlowChart( self.ida_proxy.get_func(function_address)): for head in self.ida_proxy.Heads(block.startEA, block.endEA): self.colorInstruction(head, 0xFFFFFF, refresh=False) self.ida_proxy.refresh_idaview_anyway() def colorInstruction(self, address, color, refresh=True): """ Colors the instruction at an address with the given color code. @param address: address of the instruction to color @type address: int @param color: color-code to set for the instruction @type color: int (0xBBGGRR) @param refresh: refresh IDA view to ensure the color shows directly, can be omitted for performance. @type refresh: boolean """ self.ida_proxy.SetColor(address, self.ida_proxy.CIC_ITEM, color) if refresh: self.ida_proxy.refresh_idaview_anyway() def colorBasicBlock(self, address, color, refresh=True): """ Colors the basic block containing a target address with the given color code. @param address: address an instruction in the basic block to color @type address: int @param color: color-code to set for the instruction @type color: int (0xBBGGRR) @param refresh: refresh IDA view to ensure the color shows directly, can be omitted for performance. @type refresh: boolean """ function_chart = self.ida_proxy.FlowChart( self.ida_proxy.get_func(address)) for block in function_chart: if block.startEA <= address < block.endEA: for head in self.ida_proxy.Heads(block.startEA, block.endEA): self.colorInstruction(head, color, refresh) def getNextColorScheme(self): """ get the next color scheme in the three-cycle "individual/mono/uncolored", where individual is semantic coloring @return: next state """ if self.color_state == "individual": return "mono" elif self.color_state == "mono": return "uncolored" elif self.color_state == "uncolored": return "individual" else: return "individual" def selectHighlightColor(self, tag): """ automatically chooses the highlight color for a tag based on the current color scheme @return: (int) a color code """ if self.getNextColorScheme() == "uncolored": return 0xFFFFFF elif self.getNextColorScheme() == "mono": return self.default_highlight_color else: return self.color_map[tag]["highlight_color"] def selectBaseColor(self, tagged_addresses_in_block): """ automatically chooses the base color for a block based on the current color scheme @param tagged_addresses_in_block: all tagged addresses in a basic block for which the color shall be chosen @type tagged_addresses_in_block: a list of tuples (int, str) containing pairs of instruction addresses and tags @return: (int) a color code """ if self.getNextColorScheme() == "uncolored": return 0xFFFFFF elif self.getNextColorScheme() == "mono": return self.default_base_color else: tags_in_block = [item[1] for item in tagged_addresses_in_block] colors_in_block = set([self.color_map[tags_in_block[index]]["base_color"] \ for index in xrange(len(tags_in_block))]) if len(colors_in_block) == 1: return colors_in_block.pop() else: return self.default_neutral_color def colorize(self, scan_result): """ perform coloring on the IDB, based on a scan performed by SemanticIdentifier @param scan_result: result of a scan as performed by SemanticIdentifier @type scan_result: a dictionary with key/value entries of the following form: (address, [FunctionContext]) """ for function_address in scan_result.keys(): tagged_addresses_in_function = scan_result[ function_address].getAllTaggedAddresses() function_chart = self.ida_proxy.FlowChart( self.ida_proxy.get_func(function_address)) for basic_block in function_chart: tagged_addresses_in_block = [(addr, tagged_addresses_in_function[addr]) for addr in \ tagged_addresses_in_function.keys() if addr in xrange(basic_block.startEA, basic_block.endEA)] if len(tagged_addresses_in_block) > 0: base_color = self.selectBaseColor( tagged_addresses_in_block) self.colorBasicBlock(basic_block.startEA, base_color, refresh=False) for tagged_address in tagged_addresses_in_block: highlight_color = self.selectHighlightColor( tagged_address[1]) self.colorInstruction(tagged_address[0], highlight_color, refresh=False) self.color_state = self.getNextColorScheme() self.ida_proxy.refresh_idaview_anyway() def getNextNonFuncInstruction(self, addr): next_instruction = addr while next_instruction != self.ida_proxy.BAD_ADDR: next_instruction = self.ida_proxy.find_not_func( next_instruction, self.ida_proxy.SEARCH_DOWN) flags = self.ida_proxy.GetFlags(next_instruction) if self.ida_proxy.isCode(flags): return next_instruction return self.ida_proxy.BAD_ADDR def convertNonFunctionCode(self): self.convertAnyProloguesToFunctions() # do a second run to define the rest next_instruction = self.ida_proxy.minEA() while next_instruction != self.ida_proxy.BAD_ADDR: next_instruction = self.getNextNonFuncInstruction(next_instruction) print("[+] Fixed undefined code to function @ [%08x]" % \ (next_instruction)) self.ida_proxy.MakeFunction(next_instruction) return def convertAnyProloguesToFunctions(self): self.convertDataWithPrologueToCode() self.convertNonFunctionCodeWithPrologues() def convertNonFunctionCodeWithPrologues(self): next_instruction = self.ida_proxy.minEA() while next_instruction != self.ida_proxy.BAD_ADDR: next_instruction = self.getNextNonFuncInstruction(next_instruction) if self.ida_proxy.GetMnem(next_instruction).startswith("push") and \ self.ida_proxy.GetOpType(next_instruction, 0) == 1 and \ self.ida_proxy.GetOperandValue(next_instruction, 0) == 5: instruction_after_push = self.getNextNonFuncInstruction( next_instruction) if self.ida_proxy.GetMnem(instruction_after_push).startswith("mov") and \ self.ida_proxy.GetOpType(instruction_after_push, 0) == 1 and \ self.ida_proxy.GetOperandValue(instruction_after_push, 0) == 5 and \ self.ida_proxy.GetOpType(instruction_after_push, 1) == 1 and \ self.ida_proxy.GetOperandValue(instruction_after_push, 1) == 4: print("[+] Fixed undefined code with function prologue (push ebp; mov ebp, esp) to function " \ + "@ [%08x]" % (next_instruction)) self.ida_proxy.MakeFunction(next_instruction) def convertDataWithPrologueToCode(self): current_seg = self.ida_proxy.FirstSeg() seg_end = self.ida_proxy.SegEnd(current_seg) while current_seg != self.ida_proxy.BAD_ADDR: signature_hit = self.ida_proxy.find_binary(current_seg, seg_end, "55 8B EC", 16, 1) if signature_hit != self.ida_proxy.BAD_ADDR: flags = self.ida_proxy.GetFlags(signature_hit) if not self.ida_proxy.isCode(flags): self.ida_proxy.MakeFunction(signature_hit) print("[+] Fixed undefined data with potential function prologue (push ebp; mov ebp, esp) to function " \ + "@ [%08x]" % (signature_hit)) current_seg = signature_hit + 3 + 1 else: current_seg = self.ida_proxy.NextSeg(seg_end) if not current_seg == self.ida_proxy.BAD_ADDR: seg_end = self.ida_proxy.SegEnd(current_seg)