def __init__(self):
     self.name = "CryptoIdentifier"
     print("[*] loading CryptoIdentifier")
     self.time = time
     self.re = re
     self.GraphHelper = GraphHelper
     self.CryptoSignatureHit = CryptoSignatureHit
     self.AritlogBasicBlock = AritlogBasicBlock
     self.Segment = Segment
     self.pm = PatternManager()
     self.low_rating_threshold = 0.4
     self.high_rating_threshold = 1.0
     self.low_instruction_threshold = 8
     self.high_instruction_threshold = 100
     # if the threshold is set to this value, it is automatically expanded to infinite.
     self.max_instruction_threshold = 100
     self.low_call_threshold = 0
     self.high_call_threshold = 1
     # if the threshold is set to this value, it is automatically expanded to infinite.
     self.max_call_threshold = 10
     # if at least this fraction of a signature's length' has been identified
     # consecutively, the location is marked as a signature hit.
     self.match_filter_factor = 0.5
     self.aritlog_blocks = []
     self.signature_hits = []
     self.ida_proxy = IdaProxy()
     return
 def __init__(self):
     self.name = "CryptoIdentifier"
     print ("[*] loading CryptoIdentifier")
     self.time = time
     self.re = re
     self.GraphHelper = GraphHelper
     self.CryptoSignatureHit = CryptoSignatureHit
     self.AritlogBasicBlock = AritlogBasicBlock
     self.Segment = Segment
     self.pm = PatternManager()
     self.low_rating_threshold = 0.4
     self.high_rating_threshold = 1.0
     self.low_instruction_threshold = 8
     self.high_instruction_threshold = 100
     # if the threshold is set to this value, it is automatically expanded to infinite.
     self.max_instruction_threshold = 100
     self.low_call_threshold = 0
     self.high_call_threshold = 1
     # if the threshold is set to this value, it is automatically expanded to infinite.
     self.max_call_threshold = 10
     # if at least this fraction of a signature's length' has been identified
     # consecutively, the location is marked as a signature hit.        
     self.match_filter_factor = 0.5
     self.aritlog_blocks = []
     self.signature_hits = []
     self.ida_proxy = IdaProxy()
     return
class CryptoIdentifier():
    """
    This class contains the logic to perform Crypto identification.
    Two techniques are currently supported:
    1. A heuristic approach that identifies functions and basic blocks
    based on the ratio of arithmetic/logic instructions to all instructions
    2. A signature-based approach, using the signatures defined in PatternManager
    """
    def __init__(self):
        self.name = "CryptoIdentifier"
        print("[*] loading CryptoIdentifier")
        self.time = time
        self.re = re
        self.GraphHelper = GraphHelper
        self.CryptoSignatureHit = CryptoSignatureHit
        self.AritlogBasicBlock = AritlogBasicBlock
        self.Segment = Segment
        self.pm = PatternManager()
        self.low_rating_threshold = 0.4
        self.high_rating_threshold = 1.0
        self.low_instruction_threshold = 8
        self.high_instruction_threshold = 100
        # if the threshold is set to this value, it is automatically expanded to infinite.
        self.max_instruction_threshold = 100
        self.low_call_threshold = 0
        self.high_call_threshold = 1
        # if the threshold is set to this value, it is automatically expanded to infinite.
        self.max_call_threshold = 10
        # if at least this fraction of a signature's length' has been identified
        # consecutively, the location is marked as a signature hit.
        self.match_filter_factor = 0.5
        self.aritlog_blocks = []
        self.signature_hits = []
        self.ida_proxy = IdaProxy()
        return

    def scan(self):
        """
        Scan the whole IDB with all available techniques.
        """
        self.scanAritlog()
        self.scanCryptoPatterns()

################################################################################
# Aritlog scanning
################################################################################

    def scanAritlog(self):
        """
        scan with the arithmetic/logic heuristic
        @return: a list of AritLogBasicBlock data objects that fulfill the parameters as specified
        """
        print("[*] CryptoIdentifier: Starting aritlog heuristic analysis.")
        self.aritlog_blocks = []
        time_before = self.time.time()
        for function_ea in self.ida_proxy.Functions():
            function_chart = self.ida_proxy.FlowChart(
                self.ida_proxy.get_func(function_ea))
            calls_in_function = 0
            function_blocks = []
            function_dgraph = {}
            blocks_in_loops = set()
            for current_block in function_chart:
                block = self.AritlogBasicBlock(current_block.startEA,
                                               current_block.endEA)
                for instruction in self.ida_proxy.Heads(
                        block.start_ea, block.end_ea):
                    if self.ida_proxy.isCode(
                            self.ida_proxy.GetFlags(instruction)):
                        mnemonic = self.ida_proxy.GetMnem(instruction)
                        has_identical_operands = self.ida_proxy.GetOperandValue(instruction, 0) == \
                            self.ida_proxy.GetOperandValue(instruction, 1)
                        block.updateInstructionCount(mnemonic,
                                                     has_identical_operands)
                        if mnemonic == "call":
                            calls_in_function += 1
                function_blocks.append(block)
                # prepare graph for Tarjan's algorithm
                succeeding_blocks = [
                    succ.startEA for succ in current_block.succs()
                ]
                function_dgraph[current_block.startEA] = succeeding_blocks
                # add trivial loops
                if current_block.startEA in succeeding_blocks:
                    block.is_contained_in_trivial_loop = True
                    blocks_in_loops.update([current_block.startEA])
            # perform Tarjan's algorithm to identify strongly connected components (= loops) in the function graph
            graph_helper = self.GraphHelper()
            strongly_connected = graph_helper.calculateStronglyConnectedComponents(
                function_dgraph)
            non_trivial_loops = [
                component for component in strongly_connected
                if len(component) > 1
            ]
            for component in non_trivial_loops:
                for block in component:
                    blocks_in_loops.update([block])
            for block in function_blocks:
                if block.start_ea in blocks_in_loops:
                    block.is_contained_in_loop = True
                block.num_calls_in_function = calls_in_function
            self.aritlog_blocks.extend(function_blocks)
        print("[*] Heuristics analysis took %3.2f seconds." %
              (self.time.time() - time_before))

        return self.getAritlogBlocks(
            self.low_rating_threshold, self.high_rating_threshold,
            self.low_instruction_threshold, self.high_instruction_threshold,
            self.low_call_threshold, self.high_call_threshold, False, False,
            False)

    def _updateThresholds(self, min_rating, max_rating, min_instr, max_instr,
                          min_call, max_call):
        """
        update all six threshold bounds
        @param min_rating: the minimum arit/log ratio a basic block must have
        @type min_rating: float
        @param max_rating: the maximum arit/log ratio a basic block can have
        @type max_rating: float
        @param min_instr: the minimum number of instructions a basic block must have
        @type min_instr: int
        @param max_instr: the minimum number of instructions a basic block can have
        @type max_instr: int
        @param min_call: the minimum number of calls a basic block must have
        @type min_call: int
        @param max_call: the minimum number of calls a basic block can have
        @type max_call: int
        """
        self.low_rating_threshold = max(0.0, min_rating)
        self.high_rating_threshold = min(1.0, max_rating)
        self.low_instruction_threshold = max(0, min_instr)
        if max_instr >= self.max_instruction_threshold:
            # we cap the value here and safely assume there is no block with more than 1000000 instructions
            self.high_instruction_threshold = 1000000
        else:
            self.high_instruction_threshold = max_instr
        self.low_call_threshold = max(0, min_call)
        if max_call >= self.max_call_threshold:
            # we cap the value here and safely assume there is no block with more than 1000000 instructions
            self.high_call_threshold = 1000000
        else:
            self.high_call_threshold = max_call

    def getAritlogBlocks(self, min_rating, max_rating, min_instr, max_instr, min_api, max_api, is_nonzero, \
        is_looped, is_trivially_looped):
        """
        get all blocks that are within the limits specified by the heuristic parameters.
        parameters are the same as in function "_updateThresholds" except
        param is_nonzero: defines whether zeroing instructions (like xor eax, eax) shall be counted or not.
        type is_nonzero: boolean
        param is_looped: defines whether only basic blocks in loops shall be selected
        type is_looped: boolean
        @return: a list of AritlogBasicBlock data objects, according to the parameters
        """
        self._updateThresholds(min_rating, max_rating, min_instr, max_instr,
                               min_api, max_api)
        return [
            block for block in self.aritlog_blocks
            if (self.high_rating_threshold >= block.getAritlogRating(
                is_nonzero) >= self.low_rating_threshold) and (
                    self.high_instruction_threshold >= block.num_instructions
                    >= self.low_instruction_threshold) and (
                        self.high_call_threshold >= block.num_calls_in_function
                        >= self.low_call_threshold) and
            (not is_looped or block.is_contained_in_loop) and (
                not is_trivially_looped or block.is_contained_in_trivial_loop)
        ]

    def getUnfilteredBlockCount(self):
        """
        returns the number of basic blocks that have been analyzed.
        @return: (int) number of basic blocks
        """
        return len(self.aritlog_blocks)

################################################################################
# Signature scanning
################################################################################

    def getSegmentData(self):
        """
        returns the raw bytes of the segments as stored by IDA
        @return: a list of Segment data objects.
        """
        segments = []
        for segment_ea in self.ida_proxy.Segments():
            try:
                segment = self.Segment()
                segment.start_ea = segment_ea
                segment.end_ea = self.ida_proxy.SegEnd(segment_ea)
                segment.name = self.ida_proxy.SegName(segment_ea)
                buf = ""
                for ea in helpers.Misc.lrange(
                        segment_ea, self.ida_proxy.SegEnd(segment_ea)):
                    buf += chr(self.ida_proxy.get_byte(ea))
                segment.data = buf
                segments.append(segment)
            except:
                print(
                    "[!] Tried to access invalid segment data. An error has occurred while address conversion"
                )
        return segments

    def scanCryptoPatterns(self, pattern_size=32):
        crypt_results = []
        print("[*] CryptoIdentifier: Starting crypto signature scanning.")
        time_before_matching = self.time.time()
        segments = self.getSegmentData()
        keywords = self.pm.getTokenizedSignatures(pattern_size)
        for keyword in keywords.keys():
            for segment in segments:
                crypt_results.extend([
                    self.CryptoSignatureHit(segment.start_ea + match.start(),
                                            keywords[keyword], keyword)
                    for match in self.re.finditer(self.re.escape(keyword),
                                                  segment.data)
                ])
        variable_matches = self.scanVariablePatterns()
        crypt_results.extend(variable_matches)
        print("[*] Full matching took %3.2f seconds and resulted in %d hits." %
              (self.time.time() - time_before_matching, len(crypt_results)))
        self.signature_hits = crypt_results
        return crypt_results

    def scanVariablePatterns(self):
        # the scanning code is roughly based on kyprizel's signature scan, see credtis above for more information
        crypt_results = []
        variable_signatures = self.pm.getVariableSignatures()
        for var_sig in variable_signatures.keys():
            current_seg = self.ida_proxy.FirstSeg()
            seg_end = self.ida_proxy.SegEnd(current_seg)
            while current_seg != self.ida_proxy.BAD_ADDR:
                signature_hit = self.ida_proxy.find_binary(
                    current_seg, seg_end, variable_signatures[var_sig], 16, 1)
                if signature_hit != self.ida_proxy.BAD_ADDR:
                    crypt_results.append(
                        self.CryptoSignatureHit(signature_hit, [var_sig],
                                                variable_signatures[var_sig]))
                    current_seg = signature_hit + variable_signatures[
                        var_sig].count(" ") + 1
                else:
                    current_seg = self.ida_proxy.NextSeg(seg_end)
                    if not current_seg == self.ida_proxy.BAD_ADDR:
                        seg_end = self.ida_proxy.SegEnd(current_seg)
        return crypt_results

    def getSignatureLength(self, signature_name):
        """
        returns the length for a signature, identified by its name
        @param signature_name: name for a signature, e.g. "ADLER 32"
        @type signature_name: str
        @return: (int) length of the signature.
        """
        for item in self.pm.signatures.items():
            if item[1] == signature_name:
                return len(item[0])
        return 0

    def getSignatureHits(self):
        """
        Get all signature hits that have a length of at least match_filter_factor percent
        of the signature they triggered.
        Hits are grouped by signature names.
        @return: a dictionary  with key/value entries of the following form: ("signature name", [CryptoSignatureHit])
        """
        sorted_hits = sorted(self.signature_hits)
        unified_hits = []

        previous_signature_names = []
        for hit in sorted_hits:
            hit_intersection = [
                element for element in hit.signature_names
                if element in previous_signature_names
            ]
            if len(hit_intersection) == 0:
                previous_signature_names = hit.signature_names
                unified_hits.append(self.CryptoSignatureHit(hit.start_address, hit.signature_names, \
                    hit.matched_signature))
            else:
                previous_signature_names = hit_intersection
                previous_hit = unified_hits[-1]
                if hit.start_address == previous_hit.start_address + len(
                        previous_hit.matched_signature):
                    previous_hit.matched_signature += hit.matched_signature
                    previous_hit.signature_names = hit_intersection
                else:
                    unified_hits.append(self.CryptoSignatureHit(hit.start_address, hit.signature_names, \
                        hit.matched_signature))

        filtered_hits = []
        for hit in unified_hits:
            if len(hit.matched_signature) >= max([
                    self.match_filter_factor * self.getSignatureLength(name)
                    for name in hit.signature_names
            ]):
                hit.code_refs_to = self.getXrefsToAddress(hit.start_address)
                filtered_hits.append(hit)

        grouped_hits = {}
        for hit in filtered_hits:
            for name in hit.signature_names:
                if name not in grouped_hits:
                    grouped_hits[name] = [hit]
                else:
                    grouped_hits[name].append(hit)

        return grouped_hits

    def getXrefsToAddress(self, address):
        """
        get all references to a certain address.
        These are no xrefs in IDA sense but references to the crypto signatures.
        If the signature points to an instruction, e.g. if a constant is moved to a register, the return is flagged as
        "True", meaning it is an in-code reference.
        @param address: an arbitrary address
        @type address: int
        @return: a list of tuples (int, boolean)
        """
        xrefs = []
        head_to_address = self.ida_proxy.PrevHead(address, address - 14)
        if head_to_address != 0xFFFFFFFF:
            flags = self.ida_proxy.GetFlags(head_to_address)
            if self.ida_proxy.isCode(flags):
                xrefs.append((head_to_address, True))
        for x in self.ida_proxy.XrefsTo(address):
            flags = self.ida_proxy.GetFlags(x.frm)
            if self.ida_proxy.isCode(flags):
                xrefs.append((x.frm, False))
        return xrefs
class CryptoIdentifier():
    """
    This class contains the logic to perform Crypto identification.
    Two techniques are currently supported:
    1. A heuristic approach that identifies functions and basic blocks
    based on the ratio of arithmetic/logic instructions to all instructions
    2. A signature-based approach, using the signatures defined in PatternManager
    """

    def __init__(self):
        self.name = "CryptoIdentifier"
        print ("[*] loading CryptoIdentifier")
        self.time = time
        self.re = re
        self.GraphHelper = GraphHelper
        self.CryptoSignatureHit = CryptoSignatureHit
        self.AritlogBasicBlock = AritlogBasicBlock
        self.Segment = Segment
        self.pm = PatternManager()
        self.low_rating_threshold = 0.4
        self.high_rating_threshold = 1.0
        self.low_instruction_threshold = 8
        self.high_instruction_threshold = 100
        # if the threshold is set to this value, it is automatically expanded to infinite.
        self.max_instruction_threshold = 100
        self.low_call_threshold = 0
        self.high_call_threshold = 1
        # if the threshold is set to this value, it is automatically expanded to infinite.
        self.max_call_threshold = 10
        # if at least this fraction of a signature's length' has been identified
        # consecutively, the location is marked as a signature hit.        
        self.match_filter_factor = 0.5
        self.aritlog_blocks = []
        self.signature_hits = []
        self.ida_proxy = IdaProxy()
        return

    def scan(self):
        """
        Scan the whole IDB with all available techniques.
        """
        self.scanAritlog()
        self.scanCryptoPatterns()
        
################################################################################
# Aritlog scanning
################################################################################

    def scanAritlog(self):
        """
        scan with the arithmetic/logic heuristic
        @return: a list of AritLogBasicBlock data objects that fulfill the parameters as specified
        """
        print ("[*] CryptoIdentifier: Starting aritlog heuristic analysis.")
        self.aritlog_blocks = []
        time_before = self.time.time()
        for function_ea in self.ida_proxy.Functions():
            function_chart = self.ida_proxy.FlowChart(self.ida_proxy.get_func(function_ea))
            calls_in_function = 0
            function_blocks = []
            function_dgraph = {}
            blocks_in_loops = set()
            for current_block in function_chart:
                block = self.AritlogBasicBlock(current_block.startEA, current_block.endEA)
                for instruction in self.ida_proxy.Heads(block.start_ea, block.end_ea):
                    if self.ida_proxy.isCode(self.ida_proxy.GetFlags(instruction)):
                        mnemonic = self.ida_proxy.GetMnem(instruction)
                        has_identical_operands = self.ida_proxy.GetOperandValue(instruction, 0) == \
                            self.ida_proxy.GetOperandValue(instruction, 1)
                        block.updateInstructionCount(mnemonic, has_identical_operands)
                        if mnemonic == "call":
                            calls_in_function += 1
                function_blocks.append(block)
                # prepare graph for Tarjan's algorithm
                succeeding_blocks = [succ.startEA for succ in current_block.succs()]
                function_dgraph[current_block.startEA] = succeeding_blocks
                # add trivial loops
                if current_block.startEA in succeeding_blocks:
                    block.is_contained_in_trivial_loop = True
                    blocks_in_loops.update([current_block.startEA])
            # perform Tarjan's algorithm to identify strongly connected components (= loops) in the function graph
            graph_helper = self.GraphHelper()
            strongly_connected = graph_helper.calculateStronglyConnectedComponents(function_dgraph)
            non_trivial_loops = [component for component in strongly_connected if len(component) > 1]
            for component in non_trivial_loops:
                for block in component:
                    blocks_in_loops.update([block])
            for block in function_blocks:
                if block.start_ea in blocks_in_loops:
                    block.is_contained_in_loop = True
                block.num_calls_in_function = calls_in_function
            self.aritlog_blocks.extend(function_blocks)
        print ("[*] Heuristics analysis took %3.2f seconds." % (self.time.time() - time_before))

        return self.getAritlogBlocks(self.low_rating_threshold, self.high_rating_threshold,
            self.low_instruction_threshold, self.high_instruction_threshold,
            self.low_call_threshold, self.high_call_threshold,
            False, False, False)

    def _updateThresholds(self, min_rating, max_rating, min_instr, max_instr, min_call, max_call):
        """
        update all six threshold bounds
        @param min_rating: the minimum arit/log ratio a basic block must have
        @type min_rating: float
        @param max_rating: the maximum arit/log ratio a basic block can have
        @type max_rating: float
        @param min_instr: the minimum number of instructions a basic block must have
        @type min_instr: int
        @param max_instr: the minimum number of instructions a basic block can have
        @type max_instr: int
        @param min_call: the minimum number of calls a basic block must have
        @type min_call: int
        @param max_call: the minimum number of calls a basic block can have
        @type max_call: int
        """
        self.low_rating_threshold = max(0.0, min_rating)
        self.high_rating_threshold = min(1.0, max_rating)
        self.low_instruction_threshold = max(0, min_instr)
        if max_instr >= self.max_instruction_threshold:
            # we cap the value here and safely assume there is no block with more than 1000000 instructions
            self.high_instruction_threshold = 1000000
        else:
            self.high_instruction_threshold = max_instr
        self.low_call_threshold = max(0, min_call)
        if max_call >= self.max_call_threshold:
            # we cap the value here and safely assume there is no block with more than 1000000 instructions
            self.high_call_threshold = 1000000
        else:
            self.high_call_threshold = max_call

    def getAritlogBlocks(self, min_rating, max_rating, min_instr, max_instr, min_api, max_api, is_nonzero, \
        is_looped, is_trivially_looped):
        """
        get all blocks that are within the limits specified by the heuristic parameters.
        parameters are the same as in function "_updateThresholds" except
        param is_nonzero: defines whether zeroing instructions (like xor eax, eax) shall be counted or not.
        type is_nonzero: boolean
        param is_looped: defines whether only basic blocks in loops shall be selected
        type is_looped: boolean
        @return: a list of AritlogBasicBlock data objects, according to the parameters
        """
        self._updateThresholds(min_rating, max_rating, min_instr, max_instr, min_api, max_api)
        return [block for block in self.aritlog_blocks if
            (self.high_rating_threshold >= block.getAritlogRating(is_nonzero) >= self.low_rating_threshold) and
            (self.high_instruction_threshold >= block.num_instructions >= self.low_instruction_threshold) and
            (self.high_call_threshold >= block.num_calls_in_function >= self.low_call_threshold) and
            (not is_looped or block.is_contained_in_loop) and
            (not is_trivially_looped or block.is_contained_in_trivial_loop)]

    def getUnfilteredBlockCount(self):
        """
        returns the number of basic blocks that have been analyzed.
        @return: (int) number of basic blocks
        """
        return len(self.aritlog_blocks)

################################################################################
# Signature scanning
################################################################################

    def getSegmentData(self):
        """
        returns the raw bytes of the segments as stored by IDA
        @return: a list of Segment data objects.
        """
        segments = []
        for segment_ea in self.ida_proxy.Segments():
            try:
                segment = self.Segment()
                segment.start_ea = segment_ea
                segment.end_ea = self.ida_proxy.SegEnd(segment_ea)
                segment.name = self.ida_proxy.SegName(segment_ea)
                buf = ""
                for ea in helpers.Misc.lrange(segment_ea, self.ida_proxy.SegEnd(segment_ea)):
                    buf += chr(self.ida_proxy.get_byte(ea))
                segment.data = buf
                segments.append(segment)
            except:
                print ("[!] Tried to access invalid segment data. An error has occurred while address conversion")
        return segments

    def scanCryptoPatterns(self, pattern_size=32):
        crypt_results = []
        print ("[*] CryptoIdentifier: Starting crypto signature scanning.")
        time_before_matching = self.time.time()
        segments = self.getSegmentData()
        keywords = self.pm.getTokenizedSignatures(pattern_size)
        for keyword in keywords.keys():
            for segment in segments:
                crypt_results.extend([self.CryptoSignatureHit(segment.start_ea + match.start(), keywords[keyword], keyword) for match in self.re.finditer(self.re.escape(keyword), segment.data)])
        variable_matches = self.scanVariablePatterns()
        crypt_results.extend(variable_matches)
        print ("[*] Full matching took %3.2f seconds and resulted in %d hits." % (self.time.time() - time_before_matching, len(crypt_results)))
        self.signature_hits = crypt_results
        return crypt_results

    def scanVariablePatterns(self):
        # the scanning code is roughly based on kyprizel's signature scan, see credtis above for more information
        crypt_results = []
        variable_signatures = self.pm.getVariableSignatures()
        for var_sig in variable_signatures.keys():
            current_seg = self.ida_proxy.FirstSeg()
            seg_end = self.ida_proxy.SegEnd(current_seg)
            while current_seg != self.ida_proxy.BAD_ADDR:
                signature_hit = self.ida_proxy.find_binary(current_seg, seg_end, variable_signatures[var_sig], 16, 1)
                if signature_hit != self.ida_proxy.BAD_ADDR:
                    crypt_results.append(self.CryptoSignatureHit(signature_hit, [var_sig], variable_signatures[var_sig]))
                    current_seg = signature_hit + variable_signatures[var_sig].count(" ") + 1
                else:
                    current_seg = self.ida_proxy.NextSeg(seg_end)
                    if not current_seg == self.ida_proxy.BAD_ADDR:
                        seg_end = self.ida_proxy.SegEnd(current_seg)
        return crypt_results

    def getSignatureLength(self, signature_name):
        """
        returns the length for a signature, identified by its name
        @param signature_name: name for a signature, e.g. "ADLER 32"
        @type signature_name: str
        @return: (int) length of the signature.
        """
        for item in self.pm.signatures.items():
            if item[1] == signature_name:
                return len(item[0])
        return 0

    def getSignatureHits(self):
        """
        Get all signature hits that have a length of at least match_filter_factor percent
        of the signature they triggered.
        Hits are grouped by signature names.
        @return: a dictionary  with key/value entries of the following form: ("signature name", [CryptoSignatureHit])
        """
        sorted_hits = sorted(self.signature_hits)
        unified_hits = []

        previous_signature_names = []
        for hit in sorted_hits:
            hit_intersection = [element for element in hit.signature_names if element in previous_signature_names]
            if len(hit_intersection) == 0:
                previous_signature_names = hit.signature_names
                unified_hits.append(self.CryptoSignatureHit(hit.start_address, hit.signature_names, \
                    hit.matched_signature))
            else:
                previous_signature_names = hit_intersection
                previous_hit = unified_hits[-1]
                if hit.start_address == previous_hit.start_address + len(previous_hit.matched_signature):
                    previous_hit.matched_signature += hit.matched_signature
                    previous_hit.signature_names = hit_intersection
                else:
                    unified_hits.append(self.CryptoSignatureHit(hit.start_address, hit.signature_names, \
                        hit.matched_signature))
                    
        filtered_hits = []
        for hit in unified_hits:
            if len(hit.matched_signature) >= max([self.match_filter_factor * self.getSignatureLength(name) for name in hit.signature_names]):
                hit.code_refs_to = self.getXrefsToAddress(hit.start_address)
                filtered_hits.append(hit)

        grouped_hits = {}
        for hit in filtered_hits:
            for name in hit.signature_names:
                if name not in grouped_hits:
                    grouped_hits[name] = [hit]
                else:
                    grouped_hits[name].append(hit)

        return grouped_hits
    
    def getXrefsToAddress(self, address):
        """
        get all references to a certain address.
        These are no xrefs in IDA sense but references to the crypto signatures.
        If the signature points to an instruction, e.g. if a constant is moved to a register, the return is flagged as
        "True", meaning it is an in-code reference.
        @param address: an arbitrary address
        @type address: int
        @return: a list of tuples (int, boolean)
        """
        xrefs = []
        head_to_address = self.ida_proxy.PrevHead(address, address - 14)
        if head_to_address != 0xFFFFFFFF:
            flags = self.ida_proxy.GetFlags(head_to_address)
            if self.ida_proxy.isCode(flags):
                xrefs.append((head_to_address, True))
        for x in  self.ida_proxy.XrefsTo(address):
            flags = self.ida_proxy.GetFlags(x.frm)
            if self.ida_proxy.isCode(flags):
                xrefs.append((x.frm, False))
        return xrefs