Example #1
0
    def cluster(self, messages):
        if messages is None:
            raise TypeError("Messages cannot be None")
        if len(messages) == 0:
            raise TypeError("There should be at least one message.")

        for m in messages:
            if not isinstance(m, AbstractMessage):
                raise TypeError(
                    "At least one message ({0}) is not an AbstractMessage.".
                    format(str(m)))

        self._logger.debug(
            "Identify similar messages following their alignment (min_equivalence={0})"
            .format(self.minEquivalence))

        self._logger.debug(
            "Initiating the clustering by alignment on {0} messages...".format(
                len(messages)))
        symbols = self._processUPGMA(messages, self.recomputeMatrixThreshold)
        self._logger.debug("Clustering completed, computing final alignment.")

        # Retrieve the alignment of each symbol and the build the associated regular expression
        for symbol in symbols:
            self._logger.debug("Align messages from symbol {0}".format(
                symbol.name))
            from netzob.Inference.Vocabulary.Format import Format
            Format.splitAligned(symbol, useSemantic=False)

        return symbols
Example #2
0
    def cluster(self, messages):
        if messages is None:
            raise TypeError("Messages cannot be None")
        if len(messages) == 0:
            raise TypeError("There should be at least one message.")

        for m in messages:
            if not isinstance(m, AbstractMessage):
                raise TypeError(
                    "At least one message ({0}) is not an AbstractMessage.".
                    format(str(m)))

        self._logger.debug(
            "Identify similar messages following their alignment (min_equivalence={0})".
            format(self.minEquivalence))

        self._logger.debug(
            "Initiating the clustering by alignment on {0} messages...".format(
                len(messages)))
        symbols = self._processUPGMA(messages, self.recomputeMatrixThreshold)
        self._logger.debug("Clustering completed, computing final alignment.")

        # Retrieve the alignment of each symbol and the build the associated regular expression
        for symbol in symbols:
            self._logger.debug(
                "Align messages from symbol {0}".format(symbol.name))
            from netzob.Inference.Vocabulary.Format import Format
            Format.splitAligned(symbol, useSemantic=False)

        return symbols
Example #3
0
    def execute(self, field, useSemantic=True):
        """Execute the alignement on the specified field.

        :parameter field: the field that will be aligned
        :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        """
        if field is None:
            raise TypeError("Field cannot be None")

        if useSemantic is None:
            raise TypeError("useSemantic cannot be None")

        # First step: we clean and reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Retrieve all the segment of messages to align
        messageValues = field.getMessageValues(encoded=False, styled=False)

        # Semantic tags (a.k.a applicative data)
        semanticTags = None
        if useSemantic:
            semanticTags = [
                self.__searchApplicativeDataInMessage(message)
                for message, values in list(messageValues.items())
            ]

        if len(list(messageValues.values())) == 0:
            return

        # Execute the alignement
        (alignment, semanticTags,
         score) = self._alignData(list(messageValues.values()), semanticTags)

        # Check the results
        if alignment is None:
            raise ValueError(
                "Impossible to compute an alignment for the specifed data")

        # Build Fields based on computed alignement and semantic tags
        self._updateFieldsFromAlignment(field, alignment, semanticTags)
Example #4
0
    def execute(self, field, useSemantic=True):
        """Execute the alignement on the specified field.

        :parameter field: the field that will be aligned
        :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        """
        if field is None:
            raise TypeError("Field cannot be None")

        if useSemantic is None:
            raise TypeError("useSemantic cannot be None")

        # First step: we clean and reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Retrieve all the segment of messages to align
        messageValues = field.getMessageValues(encoded=False, styled=False)

        # Semantic tags (a.k.a applicative data)
        semanticTags = None
        if useSemantic:
            semanticTags = [
                self.__searchApplicativeDataInMessage(message)
                for message, values in list(messageValues.items())
            ]

        if len(list(messageValues.values())) == 0:
            return

        # Execute the alignement
        (alignment, semanticTags, score) = self._alignData(
            list(messageValues.values()), semanticTags)

        # Check the results
        if alignment is None:
            raise ValueError(
                "Impossible to compute an alignment for the specifed data")

        # Build Fields based on computed alignement and semantic tags
        self._updateFieldsFromAlignment(field, alignment, semanticTags)
    def test_exportToScapy(self):
        # Test if exportToScapy function in ScapyExporter is working correctly.
        # Test if created file runs without error
        # Verify the content of the exported file is as expected, including size info.
        # check for both iterable and non-iterable symbols
        m1 = RawMessage("\x09\x70\x95\xcc\xef")
        m2 = RawMessage("\x0a\x70\x03\x8f\x23\x5f")
        m3 = RawMessage("\x09\x70\x31\xa0")
        m4 = RawMessage("\x0a\xd6\xb5\x5b")
        messages = [m1, m2, m3, m4]
        symbols = Symbol(messages=messages)
        with self.assertRaises(TypeError):
            iter(symbols)
        self.assertEqual(
            ScapyExporter().exportToScapy(symbols, 'test1.py', 'ProtocolName'),
            None)
        self.assertEqual(os.system("python test1.py"), 0)
        import test1
        self.assertEqual(test1.ProtocolName_Symbol().fields_desc[0].name,
                         'Field')
        self.assertEqual(test1.ProtocolName_Symbol().fields_desc[0].default,
                         None)
        self.assertEqual(test1.ProtocolName_Symbol().fields_desc[0].cls,
                         (0, None))  # size
        with self.assertRaises(AttributeError):
            test1.ProtocolName_Symbol1().fields_desc[0].name
        os.remove('test1.py')

        symbl = Format.clusterByAlignment(messages,
                                          minEquivalence=50,
                                          internalSlick=True)
        self.assertEqual(type(symbl), list)
        self.assertEqual(
            ScapyExporter().exportToScapy(symbl, 'test2.py', 'ProtocolName'),
            None)
        self.assertEqual(os.system("python test2.py"), 0)
        import test2
        self.assertEqual(test2.ProtocolName_Symbol0().fields_desc[0].name,
                         'Field00')
        self.assertEqual(test2.ProtocolName_Symbol0().fields_desc[0].cls,
                         (0, 32))  # size
        self.assertEqual(test2.ProtocolName_Symbol1().fields_desc[1].default,
                         'p')
        self.assertEqual(test2.ProtocolName_Symbol1().Field01, 'p')
        self.assertEqual(len(test2.ProtocolName_Symbol1().fields_desc), 3)
        os.remove('test2.py')
Example #6
0
    def execute(self, field):
        """Try to identify potential key fields in a symbol/field.

        >>> import binascii
        >>> from netzob.all import *
        >>> samples = ["00ff2f000011",	"000010000000",	"00fe1f000000",	"000020000000", "00ff1f000000",	"00ff1f000000",	"00ff2f000000",	"00fe1f000000"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> Format.splitStatic(symbol)
        >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString))
        >>> print symbol
        Field-0 | Field-1 | Field-2 | Field-3
        ------- | ------- | ------- | -------
        '00'    | 'ff2f'  | '0000'  | '11'   
        '00'    | '0010'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        '00'    | '0020'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff2f'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        ------- | ------- | ------- | -------

        >>> finder = FindKeyFields()
        >>> results = finder.execute(symbol)
        >>> for result in results:
        ...     print "Field name: " + result["keyField"].name + ", number of clusters: " + str(result["nbClusters"]) + ", distribution: " + str(result["distribution"])
        Field name: Field-1, number of clusters: 5, distribution: [2, 1, 2, 2, 1]
        Field name: Field-3, number of clusters: 2, distribution: [1, 7]

        :param field: the field in which we want to identify key fields.
        :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :raise Exception if something bad happens
        """

        # Safe checks
        if field is None:
            raise TypeError("'field' should not be None")
        if len(field.messages) < 2:
            return []

        results = []
        cells = field.getCells(encoded=False, styled=False, transposed=False)
        columns = zip(*cells)

        # Retrieve dynamic fields with fixed size
        for (i, f) in enumerate(field.fields):
            isCandidate = True
            lRef = len(columns[i][1])
            if len(set(columns[i])) <= 1:
                isCandidate = False
                continue
            for val in columns[i][1:]:
                if lRef != len(val):
                    isCandidate = False
                    break
            if isCandidate:
                results.append({"keyField": f})

        # Compute clusters according to each key field found
        from netzob.Inference.Vocabulary.Format import Format
        for result in results:
            tmpClusters = Format.clusterByKeyField(field, result["keyField"])
            result["nbClusters"] = len(tmpClusters)
            distrib = []  # Compute clusters distribution
            for cluster in tmpClusters.values():
                distrib.append(len(cluster.messages))
            result["distribution"] = distrib

        return results
Example #7
0
    def split(field, delimiter):
        """Split a field (or symbol) with a specific delimiter. The
        delimiter can be passed either as an ASCII, a Raw, an
        HexaString, or any objects that inherit from AbstractType.


        >>> from netzob.all import *
        >>> samples = [b"aaaaff000000ff10", b"bbff110010ff00000011", b"ccccccccfffe1f000000ff12"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages[:3])
        >>> Format.splitDelimiter(symbol, ASCII("ff"))
        >>> print(symbol)
        Field-0    | Field-sep-6666 | Field-2      | Field-sep-6666 | Field-4   
        ---------- | -------------- | ------------ | -------------- | ----------
        'aaaa'     | 'ff'           | '000000'     | 'ff'           | '10'      
        'bb'       | 'ff'           | '110010'     | 'ff'           | '00000011'
        'cccccccc' | 'ff'           | 'fe1f000000' | 'ff'           | '12'      
        ---------- | -------------- | ------------ | -------------- | ----------

        >>> samples = [b"434d446964656e74696679230400000066726564", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e7469667923090000006d7950617373776421", b"52455361757468656e74696679230000000000000000", b"434d44656e6372797074230a00000031323334353674657374", b"524553656e637279707423000000000a00000073707176777436273136", b"434d4464656372797074230a00000073707176777436273136", b"5245536465637279707423000000000a00000031323334353674657374", b"434d446279652300000000", b"524553627965230000000000000000", b"434d446964656e746966792307000000526f626572746f", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e74696679230a000000615374726f6e67507764", b"52455361757468656e74696679230000000000000000", b"434d44656e63727970742306000000616263646566", b"524553656e6372797074230000000006000000232021262724", b"434d44646563727970742306000000232021262724", b"52455364656372797074230000000006000000616263646566", b"434d446279652300000000", b"524553627965230000000000000000"]
        >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII))  # Change visualization to hexastring
        >>> Format.splitDelimiter(symbol, ASCII("#"))
        >>> print(symbol)
        Field-0         | Field-sep-23 | Field-2              | Field-sep-23 | Field-4
        --------------- | ------------ | -------------------- | ------------ | -------
        'CMDidentify'   | '#'          | '....fred'           | ''           | ''     
        'RESidentify'   | '#'          | '........'           | ''           | ''     
        'CMDinfo'       | '#'          | '....'               | ''           | ''     
        'RESinfo'       | '#'          | '........info'       | ''           | ''     
        'CMDstats'      | '#'          | '....'               | ''           | ''     
        'RESstats'      | '#'          | '........stats'      | ''           | ''     
        'CMDauthentify' | '#'          | '....myPasswd!'      | ''           | ''     
        'RESauthentify' | '#'          | '........'           | ''           | ''     
        'CMDencrypt'    | '#'          | '....123456test'     | ''           | ''     
        'RESencrypt'    | '#'          | "........spqvwt6'16" | ''           | ''     
        'CMDdecrypt'    | '#'          | "....spqvwt6'16"     | ''           | ''     
        'RESdecrypt'    | '#'          | '........123456test' | ''           | ''     
        'CMDbye'        | '#'          | '....'               | ''           | ''     
        'RESbye'        | '#'          | '........'           | ''           | ''     
        'CMDidentify'   | '#'          | '....Roberto'        | ''           | ''     
        'RESidentify'   | '#'          | '........'           | ''           | ''     
        'CMDinfo'       | '#'          | '....'               | ''           | ''     
        'RESinfo'       | '#'          | '........info'       | ''           | ''     
        'CMDstats'      | '#'          | '....'               | ''           | ''     
        'RESstats'      | '#'          | '........stats'      | ''           | ''     
        'CMDauthentify' | '#'          | '....aStrongPwd'     | ''           | ''     
        'RESauthentify' | '#'          | '........'           | ''           | ''     
        'CMDencrypt'    | '#'          | '....abcdef'         | ''           | ''     
        'RESencrypt'    | '#'          | '........'           | '#'          | " !&'$"
        'CMDdecrypt'    | '#'          | '....'               | '#'          | " !&'$"
        'RESdecrypt'    | '#'          | '........abcdef'     | ''           | ''     
        'CMDbye'        | '#'          | '....'               | ''           | ''     
        'RESbye'        | '#'          | '........'           | ''           | ''     
        --------------- | ------------ | -------------------- | ------------ | -------
        >>> print(symbol.fields[0]._str_debug())
        Field-0
        |--   Alt
              |--   Data (Raw=b'CMDidentify' ((0, 88)))
              |--   Data (Raw=b'RESidentify' ((0, 88)))
              |--   Data (Raw=b'CMDinfo' ((0, 56)))
              |--   Data (Raw=b'RESinfo' ((0, 56)))
              |--   Data (Raw=b'CMDstats' ((0, 64)))
              |--   Data (Raw=b'RESstats' ((0, 64)))
              |--   Data (Raw=b'CMDauthentify' ((0, 104)))
              |--   Data (Raw=b'RESauthentify' ((0, 104)))
              |--   Data (Raw=b'CMDencrypt' ((0, 80)))
              |--   Data (Raw=b'RESencrypt' ((0, 80)))
              |--   Data (Raw=b'CMDdecrypt' ((0, 80)))
              |--   Data (Raw=b'RESdecrypt' ((0, 80)))
              |--   Data (Raw=b'CMDbye' ((0, 48)))
              |--   Data (Raw=b'RESbye' ((0, 48)))

        Below is another example of the FieldSplitDelimiter usage: it splits fields based on a Raw string.


        >>> from netzob.all import *
        >>> samples = [b"\\x01\\x02\\x03\\xff\\x04\\x05\\xff\\x06\\x07", b"\\x01\\x02\\xff\\x03\\x04\\x05\\x06\\xff\\x07", b"\\x01\\xff\\x02\\x03\\x04\\x05\\x06"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> Format.splitDelimiter(symbol, Raw(b"\\xff"))
        >>> print(symbol)
        Field-0        | Field-sep-ff | Field-2                | Field-sep-ff | Field-4   
        -------------- | ------------ | ---------------------- | ------------ | ----------
        '\\x01\\x02\\x03' | b'\\xff'      | '\\x04\\x05'             | b'\\xff'      | '\\x06\\x07'
        '\\x01\\x02'     | b'\\xff'      | '\\x03\\x04\\x05\\x06'     | b'\\xff'      | '\\x07'    
        '\\x01'         | b'\\xff'      | '\\x02\\x03\\x04\\x05\\x06' | ''           | ''        
        -------------- | ------------ | ---------------------- | ------------ | ----------


        :param field : the field to consider when spliting
        :type: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        :param delimiter : the delimiter used to split messages of the field
        :type: :class:`netzob.Model.Types.AbstractType.AbstractType`
        """

        if delimiter is None:
            raise TypeError("Delimiter cannot be None.")

        if field is None:
            raise TypeError("Field cannot be None.")

        if len(field.messages) < 1:
            raise ValueError("The associated symbol does not contain any message.")

        # Find message substrings after applying delimiter
        splittedMessages = []

        for cell in field.getValues(encoded=False, styled=False):
            splittedMessage = cell.split(delimiter.value.tobytes())
            splittedMessages.append(splittedMessage)

        import itertools
        # Inverse the array, so that columns contains observed values for each field
        splittedMessages = list(itertools.zip_longest(*splittedMessages))
        
        # If the delimiter does not create splitted fields
        if len(splittedMessages) <= 1:
            return

        # Else, we add (2*len(splittedMessages)-1) fields
        newFields = []
        iField = -1
        for i in range(len(splittedMessages)):
            iField += 1
            
            fieldDomain = list()
            
            # temporary set that hosts all the observed values to prevent useless duplicate ones
            observedValues = set()
            has_inserted_empty_value = False
            
            isEmptyField = True  # To avoid adding an empty field            
            for v in splittedMessages[i]:
                if v != "" and v is not None:
                    isEmptyField = False
                
                    if v not in observedValues:                    
                        fieldDomain.append(Raw(v))
                        observedValues.add(v)
                else:
                    if not has_inserted_empty_value:
                        fieldDomain.append(Raw(nbBytes=0))
                        has_inserted_empty_value = True

            if not isEmptyField:
                newField = Field(domain=DomainFactory.normalizeDomain(fieldDomain), name="Field-"+str(iField))
                newField.encodingFunctions = list(field.encodingFunctions.values())
                newFields.append(newField)
                iField += 1

            str_delimiter = TypeConverter.convert(delimiter.value, BitArray, HexaString).decode('utf-8')
            fieldName = "Field-sep-{}".format(str_delimiter)

            newFields.append(Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName))

        newFields.pop()

        # Reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Create a field for each entry
        field.fields = newFields
Example #8
0
    def split(field, delimiter):
        """Split a field (or symbol) with a specific delimiter. The
        delimiter can be passed either as an ASCII, a Raw, an
        HexaString, or any objects that inherit from AbstractType.


        >>> from netzob.all import *
        >>> samples = [b"aaaaff000000ff10", b"bbff110010ff00000011", b"ccccccccfffe1f000000ff12"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages[:3])
        >>> Format.splitDelimiter(symbol, ASCII("ff"))
        >>> print(symbol)
        Field-0    | Field-sep-6666 | Field-2      | Field-sep-6666 | Field-4   
        ---------- | -------------- | ------------ | -------------- | ----------
        'aaaa'     | 'ff'           | '000000'     | 'ff'           | '10'      
        'bb'       | 'ff'           | '110010'     | 'ff'           | '00000011'
        'cccccccc' | 'ff'           | 'fe1f000000' | 'ff'           | '12'      
        ---------- | -------------- | ------------ | -------------- | ----------

        >>> samples = [b"434d446964656e74696679230400000066726564", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e7469667923090000006d7950617373776421", b"52455361757468656e74696679230000000000000000", b"434d44656e6372797074230a00000031323334353674657374", b"524553656e637279707423000000000a00000073707176777436273136", b"434d4464656372797074230a00000073707176777436273136", b"5245536465637279707423000000000a00000031323334353674657374", b"434d446279652300000000", b"524553627965230000000000000000", b"434d446964656e746966792307000000526f626572746f", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e74696679230a000000615374726f6e67507764", b"52455361757468656e74696679230000000000000000", b"434d44656e63727970742306000000616263646566", b"524553656e6372797074230000000006000000232021262724", b"434d44646563727970742306000000232021262724", b"52455364656372797074230000000006000000616263646566", b"434d446279652300000000", b"524553627965230000000000000000"]
        >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII))  # Change visualization to hexastring
        >>> Format.splitDelimiter(symbol, ASCII("#"))
        >>> print(symbol)
        Field-0         | Field-sep-23 | Field-2              | Field-sep-23 | Field-4
        --------------- | ------------ | -------------------- | ------------ | -------
        'CMDidentify'   | '#'          | '....fred'           | ''           | ''     
        'RESidentify'   | '#'          | '........'           | ''           | ''     
        'CMDinfo'       | '#'          | '....'               | ''           | ''     
        'RESinfo'       | '#'          | '........info'       | ''           | ''     
        'CMDstats'      | '#'          | '....'               | ''           | ''     
        'RESstats'      | '#'          | '........stats'      | ''           | ''     
        'CMDauthentify' | '#'          | '....myPasswd!'      | ''           | ''     
        'RESauthentify' | '#'          | '........'           | ''           | ''     
        'CMDencrypt'    | '#'          | '....123456test'     | ''           | ''     
        'RESencrypt'    | '#'          | "........spqvwt6'16" | ''           | ''     
        'CMDdecrypt'    | '#'          | "....spqvwt6'16"     | ''           | ''     
        'RESdecrypt'    | '#'          | '........123456test' | ''           | ''     
        'CMDbye'        | '#'          | '....'               | ''           | ''     
        'RESbye'        | '#'          | '........'           | ''           | ''     
        'CMDidentify'   | '#'          | '....Roberto'        | ''           | ''     
        'RESidentify'   | '#'          | '........'           | ''           | ''     
        'CMDinfo'       | '#'          | '....'               | ''           | ''     
        'RESinfo'       | '#'          | '........info'       | ''           | ''     
        'CMDstats'      | '#'          | '....'               | ''           | ''     
        'RESstats'      | '#'          | '........stats'      | ''           | ''     
        'CMDauthentify' | '#'          | '....aStrongPwd'     | ''           | ''     
        'RESauthentify' | '#'          | '........'           | ''           | ''     
        'CMDencrypt'    | '#'          | '....abcdef'         | ''           | ''     
        'RESencrypt'    | '#'          | '........'           | '#'          | " !&'$"
        'CMDdecrypt'    | '#'          | '....'               | '#'          | " !&'$"
        'RESdecrypt'    | '#'          | '........abcdef'     | ''           | ''     
        'CMDbye'        | '#'          | '....'               | ''           | ''     
        'RESbye'        | '#'          | '........'           | ''           | ''     
        --------------- | ------------ | -------------------- | ------------ | -------
        >>> print(symbol.fields[0]._str_debug())
        Field-0
        |--   Alt
              |--   Data (Raw=b'CMDidentify' ((0, 88)))
              |--   Data (Raw=b'RESidentify' ((0, 88)))
              |--   Data (Raw=b'CMDinfo' ((0, 56)))
              |--   Data (Raw=b'RESinfo' ((0, 56)))
              |--   Data (Raw=b'CMDstats' ((0, 64)))
              |--   Data (Raw=b'RESstats' ((0, 64)))
              |--   Data (Raw=b'CMDauthentify' ((0, 104)))
              |--   Data (Raw=b'RESauthentify' ((0, 104)))
              |--   Data (Raw=b'CMDencrypt' ((0, 80)))
              |--   Data (Raw=b'RESencrypt' ((0, 80)))
              |--   Data (Raw=b'CMDdecrypt' ((0, 80)))
              |--   Data (Raw=b'RESdecrypt' ((0, 80)))
              |--   Data (Raw=b'CMDbye' ((0, 48)))
              |--   Data (Raw=b'RESbye' ((0, 48)))

        Below is another example of the FieldSplitDelimiter usage: it splits fields based on a Raw string.


        >>> from netzob.all import *
        >>> samples = [b"\\x01\\x02\\x03\\xff\\x04\\x05\\xff\\x06\\x07", b"\\x01\\x02\\xff\\x03\\x04\\x05\\x06\\xff\\x07", b"\\x01\\xff\\x02\\x03\\x04\\x05\\x06"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> Format.splitDelimiter(symbol, Raw(b"\\xff"))
        >>> print(symbol)
        Field-0        | Field-sep-ff | Field-2                | Field-sep-ff | Field-4   
        -------------- | ------------ | ---------------------- | ------------ | ----------
        '\\x01\\x02\\x03' | b'\\xff'      | '\\x04\\x05'             | b'\\xff'      | '\\x06\\x07'
        '\\x01\\x02'     | b'\\xff'      | '\\x03\\x04\\x05\\x06'     | b'\\xff'      | '\\x07'    
        '\\x01'         | b'\\xff'      | '\\x02\\x03\\x04\\x05\\x06' | ''           | ''        
        -------------- | ------------ | ---------------------- | ------------ | ----------


        :param field : the field to consider when spliting
        :type: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        :param delimiter : the delimiter used to split messages of the field
        :type: :class:`netzob.Model.Vocabulary.Types.AbstractType.AbstractType`
        """

        if delimiter is None:
            raise TypeError("Delimiter cannot be None.")

        if field is None:
            raise TypeError("Field cannot be None.")

        if len(field.messages) < 1:
            raise ValueError(
                "The associated symbol does not contain any message.")

        # Find message substrings after applying delimiter
        splittedMessages = []

        for cell in field.getValues(encoded=False, styled=False):
            splittedMessage = cell.split(delimiter.value.tobytes())
            splittedMessages.append(splittedMessage)

        import itertools
        # Inverse the array, so that columns contains observed values for each field
        splittedMessages = list(itertools.zip_longest(*splittedMessages))

        # If the delimiter does not create splitted fields
        if len(splittedMessages) <= 1:
            return

        # Else, we add (2*len(splittedMessages)-1) fields
        newFields = []
        iField = -1
        for i in range(len(splittedMessages)):
            iField += 1

            fieldDomain = list()

            # temporary set that hosts all the observed values to prevent useless duplicate ones
            observedValues = set()
            has_inserted_empty_value = False

            isEmptyField = True  # To avoid adding an empty field            
            for v in splittedMessages[i]:
                if v != "" and v is not None:
                    isEmptyField = False

                    if v not in observedValues:
                        fieldDomain.append(Raw(v))
                        observedValues.add(v)
                else:
                    if not has_inserted_empty_value:
                        fieldDomain.append(Raw(nbBytes=0))
                        has_inserted_empty_value = True

            if not isEmptyField:
                newField = Field(
                    domain=DomainFactory.normalizeDomain(fieldDomain),
                    name="Field-" + str(iField))
                newField.encodingFunctions = list(
                    field.encodingFunctions.values())
                newFields.append(newField)
                iField += 1

            str_delimiter = TypeConverter.convert(delimiter.value, BitArray,
                                                  HexaString).decode('utf-8')
            fieldName = "Field-sep-{}".format(str_delimiter)

            newFields.append(
                Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName))

        newFields.pop()

        # Reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Create a field for each entry
        field.fields = newFields
Example #9
0
    def execute(self, field):
        """Try to identify potential key fields in a symbol/field.

        >>> import binascii
        >>> from netzob.all import *
        >>> samples = [b"00ff2f000011",	b"000010000000", b"00fe1f000000", b"000020000000", b"00ff1f000000", b"00ff1f000000", b"00ff2f000000", b"00fe1f000000"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> Format.splitStatic(symbol)
        >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString))
        >>> print(symbol)
        Field-0 | Field-1 | Field-2 | Field-3
        ------- | ------- | ------- | -------
        '00'    | 'ff2f'  | '0000'  | '11'   
        '00'    | '0010'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        '00'    | '0020'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff2f'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        ------- | ------- | ------- | -------

        >>> finder = FindKeyFields()
        >>> results = finder.execute(symbol)
        >>> for result in results:
        ...     print("Field name: " + result["keyField"].name + ", number of clusters: " + str(result["nbClusters"]) + ", distribution: " + str(result["distribution"]))
        Field name: Field-1, number of clusters: 5, distribution: [2, 1, 2, 1, 2]
        Field name: Field-3, number of clusters: 2, distribution: [1, 7]

        :param field: the field in which we want to identify key fields.
        :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        :raise Exception if something bad happens
        """

        # Safe checks
        if field is None:
            raise TypeError("'field' should not be None")
        if len(field.messages) < 2:
            return []

        results = []
        cells = field.getCells(encoded=False, styled=False, transposed=False)
        columns = list(zip(*cells))

        # Retrieve dynamic fields with fixed size
        for (i, f) in enumerate(field.fields):
            isCandidate = True
            lRef = len(columns[i][1])
            if len(set(columns[i])) <= 1:
                isCandidate = False
                continue
            for val in columns[i][1:]:
                if lRef != len(val):
                    isCandidate = False
                    break
            if isCandidate:
                results.append({"keyField": f})

        # Compute clusters according to each key field found
        from netzob.Inference.Vocabulary.Format import Format
        for result in results:
            tmpClusters = Format.clusterByKeyField(field, result["keyField"])
            result["nbClusters"] = len(tmpClusters)
            distrib = []  # Compute clusters distribution
            for cluster in list(tmpClusters.values()):
                distrib.append(len(cluster.messages))
            result["distribution"] = distrib

        return results
    def split(field, delimiter):
        """Split a field (or symbol) with a specific delimiter. The
        delimiter can be passed either as an ASCII, a Raw, an
        HexaString, or any objects that inherit from AbstractType.

        >>> from netzob.all import *
        >>> samples = ["aaaaff000000ff10",	"bbff110010ff00000011",	"ccccccccfffe1f000000ff12"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages[:3])
        >>> Format.splitDelimiter(symbol, ASCII("ff"))
        >>> print symbol
        'aaaa'     | 'ff' | '000000'     | 'ff' | '10'      
        'bb'       | 'ff' | '110010'     | 'ff' | '00000011'
        'cccccccc' | 'ff' | 'fe1f000000' | 'ff' | '12'      

        >>> samples = ["434d446964656e74696679230400000066726564", "5245536964656e74696679230000000000000000", "434d44696e666f2300000000", "524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e7469667923090000006d7950617373776421","52455361757468656e74696679230000000000000000","434d44656e6372797074230a00000031323334353674657374","524553656e637279707423000000000a00000073707176777436273136","434d4464656372797074230a00000073707176777436273136","5245536465637279707423000000000a00000031323334353674657374","434d446279652300000000","524553627965230000000000000000","434d446964656e746966792307000000526f626572746f","5245536964656e74696679230000000000000000","434d44696e666f2300000000","524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e74696679230a000000615374726f6e67507764","52455361757468656e74696679230000000000000000","434d44656e63727970742306000000616263646566","524553656e6372797074230000000006000000232021262724","434d44646563727970742306000000232021262724","52455364656372797074230000000006000000616263646566","434d446279652300000000","524553627965230000000000000000"]
        >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII))  # Change visualization to hexastring
        >>> Format.splitDelimiter(symbol, ASCII("#"))
        >>> print symbol
        'CMDidentify'   | '#' | '....fred'           | ''  | ''     
        'RESidentify'   | '#' | '........'           | ''  | ''     
        'CMDinfo'       | '#' | '....'               | ''  | ''     
        'RESinfo'       | '#' | '........info'       | ''  | ''     
        'CMDstats'      | '#' | '....'               | ''  | ''     
        'RESstats'      | '#' | '........stats'      | ''  | ''     
        'CMDauthentify' | '#' | '....myPasswd!'      | ''  | ''     
        'RESauthentify' | '#' | '........'           | ''  | ''     
        'CMDencrypt'    | '#' | '....123456test'     | ''  | ''     
        'RESencrypt'    | '#' | "........spqvwt6'16" | ''  | ''     
        'CMDdecrypt'    | '#' | "....spqvwt6'16"     | ''  | ''     
        'RESdecrypt'    | '#' | '........123456test' | ''  | ''     
        'CMDbye'        | '#' | '....'               | ''  | ''     
        'RESbye'        | '#' | '........'           | ''  | ''     
        'CMDidentify'   | '#' | '....Roberto'        | ''  | ''     
        'RESidentify'   | '#' | '........'           | ''  | ''     
        'CMDinfo'       | '#' | '....'               | ''  | ''     
        'RESinfo'       | '#' | '........info'       | ''  | ''     
        'CMDstats'      | '#' | '....'               | ''  | ''     
        'RESstats'      | '#' | '........stats'      | ''  | ''     
        'CMDauthentify' | '#' | '....aStrongPwd'     | ''  | ''     
        'RESauthentify' | '#' | '........'           | ''  | ''     
        'CMDencrypt'    | '#' | '....abcdef'         | ''  | ''     
        'RESencrypt'    | '#' | '........'           | '#' | " !&'$"
        'CMDdecrypt'    | '#' | '....'               | '#' | " !&'$"
        'RESdecrypt'    | '#' | '........abcdef'     | ''  | ''     
        'CMDbye'        | '#' | '....'               | ''  | ''     
        'RESbye'        | '#' | '........'           | ''  | ''     


        :param field : the field to consider when spliting
        :type: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :param delimiter : the delimiter used to split messages of the field
        :type: :class:`netzob.Common.Models.Types.AbstractType.AbstractType`
        """

        if delimiter is None:
            raise TypeError("Delimiter cannot be None.")

        if field is None:
            raise TypeError("Field cannot be None.")

        if len(field.messages) < 1:
            raise ValueError("The associated symbol does not contain any message.")

        # Find message substrings after applying delimiter
        splittedMessages = []

        for cell in field.getValues(encoded=False, styled=False):
            splittedMessage = cell.split(delimiter.value.tobytes())
            splittedMessages.append(splittedMessage)

        import itertools
        # Inverse the array, so that columns contains observed values for each field
        splittedMessages = list(itertools.izip_longest(*splittedMessages))
        
        # If the delimiter does not create splitted fields
        if len(splittedMessages) <= 1:
            return

        # Else, we add (2*len(splittedMessages)-1) fields
        newFields = []
        iField = -1
        for i in range(len(splittedMessages)):
            iField += 1
            fieldDomain = set()
            isEmptyField = True  # To avoid adding an empty field
            emptyValueFound = False
            for v in splittedMessages[i]:
                if v != "" and v is not None:
                    isEmptyField = False
                    fieldDomain.add(Raw(v))
                else:
                    fieldDomain.add(Raw(nbBytes=0))

            if not isEmptyField:
                fieldDomain = list(fieldDomain)
                newField = Field(domain=DomainFactory.normalizeDomain(fieldDomain), name="Field-"+str(iField))
                newField.encodingFunctions = field.encodingFunctions.values()
                newFields.append(newField)
                iField += 1

            fieldName = "Field-sep-" + TypeConverter.convert(delimiter.value, BitArray, HexaString)

            newFields.append(Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName))

        newFields.pop()

        # Reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Create a field for each entry
        field.fields = newFields