Example #1
0
    def execute(self, field):
        """Try to identify potential key fields in a symbol/field.

        >>> import binascii
        >>> from netzob.all import *
        >>> samples = ["00ff2f000011",	"000010000000",	"00fe1f000000",	"000020000000", "00ff1f000000",	"00ff1f000000",	"00ff2f000000",	"00fe1f000000"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> Format.splitStatic(symbol)
        >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString))
        >>> print symbol
        Field-0 | Field-1 | Field-2 | Field-3
        ------- | ------- | ------- | -------
        '00'    | 'ff2f'  | '0000'  | '11'   
        '00'    | '0010'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        '00'    | '0020'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff2f'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        ------- | ------- | ------- | -------

        >>> finder = FindKeyFields()
        >>> results = finder.execute(symbol)
        >>> for result in results:
        ...     print "Field name: " + result["keyField"].name + ", number of clusters: " + str(result["nbClusters"]) + ", distribution: " + str(result["distribution"])
        Field name: Field-1, number of clusters: 5, distribution: [2, 1, 2, 2, 1]
        Field name: Field-3, number of clusters: 2, distribution: [1, 7]

        :param field: the field in which we want to identify key fields.
        :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :raise Exception if something bad happens
        """

        # Safe checks
        if field is None:
            raise TypeError("'field' should not be None")
        if len(field.messages) < 2:
            return []

        results = []
        cells = field.getCells(encoded=False, styled=False, transposed=False)
        columns = zip(*cells)

        # Retrieve dynamic fields with fixed size
        for (i, f) in enumerate(field.fields):
            isCandidate = True
            lRef = len(columns[i][1])
            if len(set(columns[i])) <= 1:
                isCandidate = False
                continue
            for val in columns[i][1:]:
                if lRef != len(val):
                    isCandidate = False
                    break
            if isCandidate:
                results.append({"keyField": f})

        # Compute clusters according to each key field found
        from netzob.Inference.Vocabulary.Format import Format
        for result in results:
            tmpClusters = Format.clusterByKeyField(field, result["keyField"])
            result["nbClusters"] = len(tmpClusters)
            distrib = []  # Compute clusters distribution
            for cluster in tmpClusters.values():
                distrib.append(len(cluster.messages))
            result["distribution"] = distrib

        return results
Example #2
0
    def execute(self, field):
        """Try to identify potential key fields in a symbol/field.

        >>> import binascii
        >>> from netzob.all import *
        >>> samples = [b"00ff2f000011",	b"000010000000", b"00fe1f000000", b"000020000000", b"00ff1f000000", b"00ff1f000000", b"00ff2f000000", b"00fe1f000000"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> Format.splitStatic(symbol)
        >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString))
        >>> print(symbol)
        Field-0 | Field-1 | Field-2 | Field-3
        ------- | ------- | ------- | -------
        '00'    | 'ff2f'  | '0000'  | '11'   
        '00'    | '0010'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        '00'    | '0020'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff2f'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        ------- | ------- | ------- | -------

        >>> finder = FindKeyFields()
        >>> results = finder.execute(symbol)
        >>> for result in results:
        ...     print("Field name: " + result["keyField"].name + ", number of clusters: " + str(result["nbClusters"]) + ", distribution: " + str(result["distribution"]))
        Field name: Field-1, number of clusters: 5, distribution: [2, 1, 2, 1, 2]
        Field name: Field-3, number of clusters: 2, distribution: [1, 7]

        :param field: the field in which we want to identify key fields.
        :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        :raise Exception if something bad happens
        """

        # Safe checks
        if field is None:
            raise TypeError("'field' should not be None")
        if len(field.messages) < 2:
            return []

        results = []
        cells = field.getCells(encoded=False, styled=False, transposed=False)
        columns = list(zip(*cells))

        # Retrieve dynamic fields with fixed size
        for (i, f) in enumerate(field.fields):
            isCandidate = True
            lRef = len(columns[i][1])
            if len(set(columns[i])) <= 1:
                isCandidate = False
                continue
            for val in columns[i][1:]:
                if lRef != len(val):
                    isCandidate = False
                    break
            if isCandidate:
                results.append({"keyField": f})

        # Compute clusters according to each key field found
        from netzob.Inference.Vocabulary.Format import Format
        for result in results:
            tmpClusters = Format.clusterByKeyField(field, result["keyField"])
            result["nbClusters"] = len(tmpClusters)
            distrib = []  # Compute clusters distribution
            for cluster in list(tmpClusters.values()):
                distrib.append(len(cluster.messages))
            result["distribution"] = distrib

        return results