Python Format Examples

Programming Language: Python

Namespace/Package Name: netzob.Inference.Vocabulary.Format

Class/Type: Format

Examples at hotexamples.com: 10

Python Format - 10 examples found. These are the top rated real world Python examples of netzob.Inference.Vocabulary.Format.Format extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

resetFormat(2)

clusterByAlignment(1)

clusterByKeyField(1)

splitAligned(1)

Example #1

Show file

File: ClusterByAlignment.py Project: Flyour/netzob

    def cluster(self, messages):
        if messages is None:
            raise TypeError("Messages cannot be None")
        if len(messages) == 0:
            raise TypeError("There should be at least one message.")

        for m in messages:
            if not isinstance(m, AbstractMessage):
                raise TypeError(
                    "At least one message ({0}) is not an AbstractMessage.".
                    format(str(m)))

        self._logger.debug(
            "Identify similar messages following their alignment (min_equivalence={0})"
            .format(self.minEquivalence))

        self._logger.debug(
            "Initiating the clustering by alignment on {0} messages...".format(
                len(messages)))
        symbols = self._processUPGMA(messages, self.recomputeMatrixThreshold)
        self._logger.debug("Clustering completed, computing final alignment.")

        # Retrieve the alignment of each symbol and the build the associated regular expression
        for symbol in symbols:
            self._logger.debug("Align messages from symbol {0}".format(
                symbol.name))
            from netzob.Inference.Vocabulary.Format import Format
            Format.splitAligned(symbol, useSemantic=False)

        return symbols

Example #2

Show file

File: ClusterByAlignment.py Project: gbossert/netzob

    def cluster(self, messages):
        if messages is None:
            raise TypeError("Messages cannot be None")
        if len(messages) == 0:
            raise TypeError("There should be at least one message.")

        for m in messages:
            if not isinstance(m, AbstractMessage):
                raise TypeError(
                    "At least one message ({0}) is not an AbstractMessage.".
                    format(str(m)))

        self._logger.debug(
            "Identify similar messages following their alignment (min_equivalence={0})".
            format(self.minEquivalence))

        self._logger.debug(
            "Initiating the clustering by alignment on {0} messages...".format(
                len(messages)))
        symbols = self._processUPGMA(messages, self.recomputeMatrixThreshold)
        self._logger.debug("Clustering completed, computing final alignment.")

        # Retrieve the alignment of each symbol and the build the associated regular expression
        for symbol in symbols:
            self._logger.debug(
                "Align messages from symbol {0}".format(symbol.name))
            from netzob.Inference.Vocabulary.Format import Format
            Format.splitAligned(symbol, useSemantic=False)

        return symbols

Example #3

Show file

    def execute(self, field, useSemantic=True):
        """Execute the alignement on the specified field.

        :parameter field: the field that will be aligned
        :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        """
        if field is None:
            raise TypeError("Field cannot be None")

        if useSemantic is None:
            raise TypeError("useSemantic cannot be None")

        # First step: we clean and reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Retrieve all the segment of messages to align
        messageValues = field.getMessageValues(encoded=False, styled=False)

        # Semantic tags (a.k.a applicative data)
        semanticTags = None
        if useSemantic:
            semanticTags = [
                self.__searchApplicativeDataInMessage(message)
                for message, values in list(messageValues.items())
            ]

        if len(list(messageValues.values())) == 0:
            return

        # Execute the alignement
        (alignment, semanticTags,
         score) = self._alignData(list(messageValues.values()), semanticTags)

        # Check the results
        if alignment is None:
            raise ValueError(
                "Impossible to compute an alignment for the specifed data")

        # Build Fields based on computed alignement and semantic tags
        self._updateFieldsFromAlignment(field, alignment, semanticTags)

Example #4

Show file

File: FieldSplitAligned.py Project: netzob/netzob

    def execute(self, field, useSemantic=True):
        """Execute the alignement on the specified field.

        :parameter field: the field that will be aligned
        :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        """
        if field is None:
            raise TypeError("Field cannot be None")

        if useSemantic is None:
            raise TypeError("useSemantic cannot be None")

        # First step: we clean and reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Retrieve all the segment of messages to align
        messageValues = field.getMessageValues(encoded=False, styled=False)

        # Semantic tags (a.k.a applicative data)
        semanticTags = None
        if useSemantic:
            semanticTags = [
                self.__searchApplicativeDataInMessage(message)
                for message, values in list(messageValues.items())
            ]

        if len(list(messageValues.values())) == 0:
            return

        # Execute the alignement
        (alignment, semanticTags, score) = self._alignData(
            list(messageValues.values()), semanticTags)

        # Check the results
        if alignment is None:
            raise ValueError(
                "Impossible to compute an alignment for the specifed data")

        # Build Fields based on computed alignement and semantic tags
        self._updateFieldsFromAlignment(field, alignment, semanticTags)

Example #5

Show file

File: test_ScapyExporter.py Project: youdinforsec/netzob

    def test_exportToScapy(self):
        # Test if exportToScapy function in ScapyExporter is working correctly.
        # Test if created file runs without error
        # Verify the content of the exported file is as expected, including size info.
        # check for both iterable and non-iterable symbols
        m1 = RawMessage("\x09\x70\x95\xcc\xef")
        m2 = RawMessage("\x0a\x70\x03\x8f\x23\x5f")
        m3 = RawMessage("\x09\x70\x31\xa0")
        m4 = RawMessage("\x0a\xd6\xb5\x5b")
        messages = [m1, m2, m3, m4]
        symbols = Symbol(messages=messages)
        with self.assertRaises(TypeError):
            iter(symbols)
        self.assertEqual(
            ScapyExporter().exportToScapy(symbols, 'test1.py', 'ProtocolName'),
            None)
        self.assertEqual(os.system("python test1.py"), 0)
        import test1
        self.assertEqual(test1.ProtocolName_Symbol().fields_desc[0].name,
                         'Field')
        self.assertEqual(test1.ProtocolName_Symbol().fields_desc[0].default,
                         None)
        self.assertEqual(test1.ProtocolName_Symbol().fields_desc[0].cls,
                         (0, None))  # size
        with self.assertRaises(AttributeError):
            test1.ProtocolName_Symbol1().fields_desc[0].name
        os.remove('test1.py')

        symbl = Format.clusterByAlignment(messages,
                                          minEquivalence=50,
                                          internalSlick=True)
        self.assertEqual(type(symbl), list)
        self.assertEqual(
            ScapyExporter().exportToScapy(symbl, 'test2.py', 'ProtocolName'),
            None)
        self.assertEqual(os.system("python test2.py"), 0)
        import test2
        self.assertEqual(test2.ProtocolName_Symbol0().fields_desc[0].name,
                         'Field00')
        self.assertEqual(test2.ProtocolName_Symbol0().fields_desc[0].cls,
                         (0, 32))  # size
        self.assertEqual(test2.ProtocolName_Symbol1().fields_desc[1].default,
                         'p')
        self.assertEqual(test2.ProtocolName_Symbol1().Field01, 'p')
        self.assertEqual(len(test2.ProtocolName_Symbol1().fields_desc), 3)
        os.remove('test2.py')

Example #6

Show file

File: FindKeyFields.py Project: chubbymaggie/netzob

    def execute(self, field):
        """Try to identify potential key fields in a symbol/field.

        >>> import binascii
        >>> from netzob.all import *
        >>> samples = ["00ff2f000011",	"000010000000",	"00fe1f000000",	"000020000000", "00ff1f000000",	"00ff1f000000",	"00ff2f000000",	"00fe1f000000"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> Format.splitStatic(symbol)
        >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString))
        >>> print symbol
        Field-0 | Field-1 | Field-2 | Field-3
        ------- | ------- | ------- | -------
        '00'    | 'ff2f'  | '0000'  | '11'   
        '00'    | '0010'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        '00'    | '0020'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff2f'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        ------- | ------- | ------- | -------

        >>> finder = FindKeyFields()
        >>> results = finder.execute(symbol)
        >>> for result in results:
        ...     print "Field name: " + result["keyField"].name + ", number of clusters: " + str(result["nbClusters"]) + ", distribution: " + str(result["distribution"])
        Field name: Field-1, number of clusters: 5, distribution: [2, 1, 2, 2, 1]
        Field name: Field-3, number of clusters: 2, distribution: [1, 7]

        :param field: the field in which we want to identify key fields.
        :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :raise Exception if something bad happens
        """

        # Safe checks
        if field is None:
            raise TypeError("'field' should not be None")
        if len(field.messages) < 2:
            return []

        results = []
        cells = field.getCells(encoded=False, styled=False, transposed=False)
        columns = zip(*cells)

        # Retrieve dynamic fields with fixed size
        for (i, f) in enumerate(field.fields):
            isCandidate = True
            lRef = len(columns[i][1])
            if len(set(columns[i])) <= 1:
                isCandidate = False
                continue
            for val in columns[i][1:]:
                if lRef != len(val):
                    isCandidate = False
                    break
            if isCandidate:
                results.append({"keyField": f})

        # Compute clusters according to each key field found
        from netzob.Inference.Vocabulary.Format import Format
        for result in results:
            tmpClusters = Format.clusterByKeyField(field, result["keyField"])
            result["nbClusters"] = len(tmpClusters)
            distrib = []  # Compute clusters distribution
            for cluster in tmpClusters.values():
                distrib.append(len(cluster.messages))
            result["distribution"] = distrib

        return results

Example #7

Show file

    def split(field, delimiter):
        """Split a field (or symbol) with a specific delimiter. The
        delimiter can be passed either as an ASCII, a Raw, an
        HexaString, or any objects that inherit from AbstractType.


        >>> from netzob.all import *
        >>> samples = [b"aaaaff000000ff10", b"bbff110010ff00000011", b"ccccccccfffe1f000000ff12"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages[:3])
        >>> Format.splitDelimiter(symbol, ASCII("ff"))
        >>> print(symbol)
        Field-0    | Field-sep-6666 | Field-2      | Field-sep-6666 | Field-4   
        ---------- | -------------- | ------------ | -------------- | ----------
        'aaaa'     | 'ff'           | '000000'     | 'ff'           | '10'      
        'bb'       | 'ff'           | '110010'     | 'ff'           | '00000011'
        'cccccccc' | 'ff'           | 'fe1f000000' | 'ff'           | '12'      
        ---------- | -------------- | ------------ | -------------- | ----------

        >>> samples = [b"434d446964656e74696679230400000066726564", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e7469667923090000006d7950617373776421", b"52455361757468656e74696679230000000000000000", b"434d44656e6372797074230a00000031323334353674657374", b"524553656e637279707423000000000a00000073707176777436273136", b"434d4464656372797074230a00000073707176777436273136", b"5245536465637279707423000000000a00000031323334353674657374", b"434d446279652300000000", b"524553627965230000000000000000", b"434d446964656e746966792307000000526f626572746f", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e74696679230a000000615374726f6e67507764", b"52455361757468656e74696679230000000000000000", b"434d44656e63727970742306000000616263646566", b"524553656e6372797074230000000006000000232021262724", b"434d44646563727970742306000000232021262724", b"52455364656372797074230000000006000000616263646566", b"434d446279652300000000", b"524553627965230000000000000000"]
        >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII))  # Change visualization to hexastring
        >>> Format.splitDelimiter(symbol, ASCII("#"))
        >>> print(symbol)
        Field-0         | Field-sep-23 | Field-2              | Field-sep-23 | Field-4
        --------------- | ------------ | -------------------- | ------------ | -------
        'CMDidentify'   | '#'          | '....fred'           | ''           | ''     
        'RESidentify'   | '#'          | '........'           | ''           | ''     
        'CMDinfo'       | '#'          | '....'               | ''           | ''     
        'RESinfo'       | '#'          | '........info'       | ''           | ''     
        'CMDstats'      | '#'          | '....'               | ''           | ''     
        'RESstats'      | '#'          | '........stats'      | ''           | ''     
        'CMDauthentify' | '#'          | '....myPasswd!'      | ''           | ''     
        'RESauthentify' | '#'          | '........'           | ''           | ''     
        'CMDencrypt'    | '#'          | '....123456test'     | ''           | ''     
        'RESencrypt'    | '#'          | "........spqvwt6'16" | ''           | ''     
        'CMDdecrypt'    | '#'          | "....spqvwt6'16"     | ''           | ''     
        'RESdecrypt'    | '#'          | '........123456test' | ''           | ''     
        'CMDbye'        | '#'          | '....'               | ''           | ''     
        'RESbye'        | '#'          | '........'           | ''           | ''     
        'CMDidentify'   | '#'          | '....Roberto'        | ''           | ''     
        'RESidentify'   | '#'          | '........'           | ''           | ''     
        'CMDinfo'       | '#'          | '....'               | ''           | ''     
        'RESinfo'       | '#'          | '........info'       | ''           | ''     
        'CMDstats'      | '#'          | '....'               | ''           | ''     
        'RESstats'      | '#'          | '........stats'      | ''           | ''     
        'CMDauthentify' | '#'          | '....aStrongPwd'     | ''           | ''     
        'RESauthentify' | '#'          | '........'           | ''           | ''     
        'CMDencrypt'    | '#'          | '....abcdef'         | ''           | ''     
        'RESencrypt'    | '#'          | '........'           | '#'          | " !&'$"
        'CMDdecrypt'    | '#'          | '....'               | '#'          | " !&'$"
        'RESdecrypt'    | '#'          | '........abcdef'     | ''           | ''     
        'CMDbye'        | '#'          | '....'               | ''           | ''     
        'RESbye'        | '#'          | '........'           | ''           | ''     
        --------------- | ------------ | -------------------- | ------------ | -------
        >>> print(symbol.fields[0]._str_debug())
        Field-0
        |--   Alt
              |--   Data (Raw=b'CMDidentify' ((0, 88)))
              |--   Data (Raw=b'RESidentify' ((0, 88)))
              |--   Data (Raw=b'CMDinfo' ((0, 56)))
              |--   Data (Raw=b'RESinfo' ((0, 56)))
              |--   Data (Raw=b'CMDstats' ((0, 64)))
              |--   Data (Raw=b'RESstats' ((0, 64)))
              |--   Data (Raw=b'CMDauthentify' ((0, 104)))
              |--   Data (Raw=b'RESauthentify' ((0, 104)))
              |--   Data (Raw=b'CMDencrypt' ((0, 80)))
              |--   Data (Raw=b'RESencrypt' ((0, 80)))
              |--   Data (Raw=b'CMDdecrypt' ((0, 80)))
              |--   Data (Raw=b'RESdecrypt' ((0, 80)))
              |--   Data (Raw=b'CMDbye' ((0, 48)))
              |--   Data (Raw=b'RESbye' ((0, 48)))

        Below is another example of the FieldSplitDelimiter usage: it splits fields based on a Raw string.


        >>> from netzob.all import *
        >>> samples = [b"\\x01\\x02\\x03\\xff\\x04\\x05\\xff\\x06\\x07", b"\\x01\\x02\\xff\\x03\\x04\\x05\\x06\\xff\\x07", b"\\x01\\xff\\x02\\x03\\x04\\x05\\x06"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> Format.splitDelimiter(symbol, Raw(b"\\xff"))
        >>> print(symbol)
        Field-0        | Field-sep-ff | Field-2                | Field-sep-ff | Field-4   
        -------------- | ------------ | ---------------------- | ------------ | ----------
        '\\x01\\x02\\x03' | b'\\xff'      | '\\x04\\x05'             | b'\\xff'      | '\\x06\\x07'
        '\\x01\\x02'     | b'\\xff'      | '\\x03\\x04\\x05\\x06'     | b'\\xff'      | '\\x07'    
        '\\x01'         | b'\\xff'      | '\\x02\\x03\\x04\\x05\\x06' | ''           | ''        
        -------------- | ------------ | ---------------------- | ------------ | ----------


        :param field : the field to consider when spliting
        :type: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        :param delimiter : the delimiter used to split messages of the field
        :type: :class:`netzob.Model.Types.AbstractType.AbstractType`
        """

        if delimiter is None:
            raise TypeError("Delimiter cannot be None.")

        if field is None:
            raise TypeError("Field cannot be None.")

        if len(field.messages) < 1:
            raise ValueError("The associated symbol does not contain any message.")

        # Find message substrings after applying delimiter
        splittedMessages = []

        for cell in field.getValues(encoded=False, styled=False):
            splittedMessage = cell.split(delimiter.value.tobytes())
            splittedMessages.append(splittedMessage)

        import itertools
        # Inverse the array, so that columns contains observed values for each field
        splittedMessages = list(itertools.zip_longest(*splittedMessages))
        
        # If the delimiter does not create splitted fields
        if len(splittedMessages) <= 1:
            return

        # Else, we add (2*len(splittedMessages)-1) fields
        newFields = []
        iField = -1
        for i in range(len(splittedMessages)):
            iField += 1
            
            fieldDomain = list()
            
            # temporary set that hosts all the observed values to prevent useless duplicate ones
            observedValues = set()
            has_inserted_empty_value = False
            
            isEmptyField = True  # To avoid adding an empty field            
            for v in splittedMessages[i]:
                if v != "" and v is not None:
                    isEmptyField = False
                
                    if v not in observedValues:                    
                        fieldDomain.append(Raw(v))
                        observedValues.add(v)
                else:
                    if not has_inserted_empty_value:
                        fieldDomain.append(Raw(nbBytes=0))
                        has_inserted_empty_value = True

            if not isEmptyField:
                newField = Field(domain=DomainFactory.normalizeDomain(fieldDomain), name="Field-"+str(iField))
                newField.encodingFunctions = list(field.encodingFunctions.values())
                newFields.append(newField)
                iField += 1

            str_delimiter = TypeConverter.convert(delimiter.value, BitArray, HexaString).decode('utf-8')
            fieldName = "Field-sep-{}".format(str_delimiter)

            newFields.append(Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName))

        newFields.pop()

        # Reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Create a field for each entry
        field.fields = newFields

Example #8

Show file

File: FieldSplitDelimiter.py Project: gbossert/netzob

    def split(field, delimiter):
        """Split a field (or symbol) with a specific delimiter. The
        delimiter can be passed either as an ASCII, a Raw, an
        HexaString, or any objects that inherit from AbstractType.


        >>> from netzob.all import *
        >>> samples = [b"aaaaff000000ff10", b"bbff110010ff00000011", b"ccccccccfffe1f000000ff12"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages[:3])
        >>> Format.splitDelimiter(symbol, ASCII("ff"))
        >>> print(symbol)
        Field-0    | Field-sep-6666 | Field-2      | Field-sep-6666 | Field-4   
        ---------- | -------------- | ------------ | -------------- | ----------
        'aaaa'     | 'ff'           | '000000'     | 'ff'           | '10'      
        'bb'       | 'ff'           | '110010'     | 'ff'           | '00000011'
        'cccccccc' | 'ff'           | 'fe1f000000' | 'ff'           | '12'      
        ---------- | -------------- | ------------ | -------------- | ----------

        >>> samples = [b"434d446964656e74696679230400000066726564", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e7469667923090000006d7950617373776421", b"52455361757468656e74696679230000000000000000", b"434d44656e6372797074230a00000031323334353674657374", b"524553656e637279707423000000000a00000073707176777436273136", b"434d4464656372797074230a00000073707176777436273136", b"5245536465637279707423000000000a00000031323334353674657374", b"434d446279652300000000", b"524553627965230000000000000000", b"434d446964656e746966792307000000526f626572746f", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e74696679230a000000615374726f6e67507764", b"52455361757468656e74696679230000000000000000", b"434d44656e63727970742306000000616263646566", b"524553656e6372797074230000000006000000232021262724", b"434d44646563727970742306000000232021262724", b"52455364656372797074230000000006000000616263646566", b"434d446279652300000000", b"524553627965230000000000000000"]
        >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII))  # Change visualization to hexastring
        >>> Format.splitDelimiter(symbol, ASCII("#"))
        >>> print(symbol)
        Field-0         | Field-sep-23 | Field-2              | Field-sep-23 | Field-4
        --------------- | ------------ | -------------------- | ------------ | -------
        'CMDidentify'   | '#'          | '....fred'           | ''           | ''     
        'RESidentify'   | '#'          | '........'           | ''           | ''     
        'CMDinfo'       | '#'          | '....'               | ''           | ''     
        'RESinfo'       | '#'          | '........info'       | ''           | ''     
        'CMDstats'      | '#'          | '....'               | ''           | ''     
        'RESstats'      | '#'          | '........stats'      | ''           | ''     
        'CMDauthentify' | '#'          | '....myPasswd!'      | ''           | ''     
        'RESauthentify' | '#'          | '........'           | ''           | ''     
        'CMDencrypt'    | '#'          | '....123456test'     | ''           | ''     
        'RESencrypt'    | '#'          | "........spqvwt6'16" | ''           | ''     
        'CMDdecrypt'    | '#'          | "....spqvwt6'16"     | ''           | ''     
        'RESdecrypt'    | '#'          | '........123456test' | ''           | ''     
        'CMDbye'        | '#'          | '....'               | ''           | ''     
        'RESbye'        | '#'          | '........'           | ''           | ''     
        'CMDidentify'   | '#'          | '....Roberto'        | ''           | ''     
        'RESidentify'   | '#'          | '........'           | ''           | ''     
        'CMDinfo'       | '#'          | '....'               | ''           | ''     
        'RESinfo'       | '#'          | '........info'       | ''           | ''     
        'CMDstats'      | '#'          | '....'               | ''           | ''     
        'RESstats'      | '#'          | '........stats'      | ''           | ''     
        'CMDauthentify' | '#'          | '....aStrongPwd'     | ''           | ''     
        'RESauthentify' | '#'          | '........'           | ''           | ''     
        'CMDencrypt'    | '#'          | '....abcdef'         | ''           | ''     
        'RESencrypt'    | '#'          | '........'           | '#'          | " !&'$"
        'CMDdecrypt'    | '#'          | '....'               | '#'          | " !&'$"
        'RESdecrypt'    | '#'          | '........abcdef'     | ''           | ''     
        'CMDbye'        | '#'          | '....'               | ''           | ''     
        'RESbye'        | '#'          | '........'           | ''           | ''     
        --------------- | ------------ | -------------------- | ------------ | -------
        >>> print(symbol.fields[0]._str_debug())
        Field-0
        |--   Alt
              |--   Data (Raw=b'CMDidentify' ((0, 88)))
              |--   Data (Raw=b'RESidentify' ((0, 88)))
              |--   Data (Raw=b'CMDinfo' ((0, 56)))
              |--   Data (Raw=b'RESinfo' ((0, 56)))
              |--   Data (Raw=b'CMDstats' ((0, 64)))
              |--   Data (Raw=b'RESstats' ((0, 64)))
              |--   Data (Raw=b'CMDauthentify' ((0, 104)))
              |--   Data (Raw=b'RESauthentify' ((0, 104)))
              |--   Data (Raw=b'CMDencrypt' ((0, 80)))
              |--   Data (Raw=b'RESencrypt' ((0, 80)))
              |--   Data (Raw=b'CMDdecrypt' ((0, 80)))
              |--   Data (Raw=b'RESdecrypt' ((0, 80)))
              |--   Data (Raw=b'CMDbye' ((0, 48)))
              |--   Data (Raw=b'RESbye' ((0, 48)))

        Below is another example of the FieldSplitDelimiter usage: it splits fields based on a Raw string.


        >>> from netzob.all import *
        >>> samples = [b"\\x01\\x02\\x03\\xff\\x04\\x05\\xff\\x06\\x07", b"\\x01\\x02\\xff\\x03\\x04\\x05\\x06\\xff\\x07", b"\\x01\\xff\\x02\\x03\\x04\\x05\\x06"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> Format.splitDelimiter(symbol, Raw(b"\\xff"))
        >>> print(symbol)
        Field-0        | Field-sep-ff | Field-2                | Field-sep-ff | Field-4   
        -------------- | ------------ | ---------------------- | ------------ | ----------
        '\\x01\\x02\\x03' | b'\\xff'      | '\\x04\\x05'             | b'\\xff'      | '\\x06\\x07'
        '\\x01\\x02'     | b'\\xff'      | '\\x03\\x04\\x05\\x06'     | b'\\xff'      | '\\x07'    
        '\\x01'         | b'\\xff'      | '\\x02\\x03\\x04\\x05\\x06' | ''           | ''        
        -------------- | ------------ | ---------------------- | ------------ | ----------


        :param field : the field to consider when spliting
        :type: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        :param delimiter : the delimiter used to split messages of the field
        :type: :class:`netzob.Model.Vocabulary.Types.AbstractType.AbstractType`
        """

        if delimiter is None:
            raise TypeError("Delimiter cannot be None.")

        if field is None:
            raise TypeError("Field cannot be None.")

        if len(field.messages) < 1:
            raise ValueError(
                "The associated symbol does not contain any message.")

        # Find message substrings after applying delimiter
        splittedMessages = []

        for cell in field.getValues(encoded=False, styled=False):
            splittedMessage = cell.split(delimiter.value.tobytes())
            splittedMessages.append(splittedMessage)

        import itertools
        # Inverse the array, so that columns contains observed values for each field
        splittedMessages = list(itertools.zip_longest(*splittedMessages))

        # If the delimiter does not create splitted fields
        if len(splittedMessages) <= 1:
            return

        # Else, we add (2*len(splittedMessages)-1) fields
        newFields = []
        iField = -1
        for i in range(len(splittedMessages)):
            iField += 1

            fieldDomain = list()

            # temporary set that hosts all the observed values to prevent useless duplicate ones
            observedValues = set()
            has_inserted_empty_value = False

            isEmptyField = True  # To avoid adding an empty field            
            for v in splittedMessages[i]:
                if v != "" and v is not None:
                    isEmptyField = False

                    if v not in observedValues:
                        fieldDomain.append(Raw(v))
                        observedValues.add(v)
                else:
                    if not has_inserted_empty_value:
                        fieldDomain.append(Raw(nbBytes=0))
                        has_inserted_empty_value = True

            if not isEmptyField:
                newField = Field(
                    domain=DomainFactory.normalizeDomain(fieldDomain),
                    name="Field-" + str(iField))
                newField.encodingFunctions = list(
                    field.encodingFunctions.values())
                newFields.append(newField)
                iField += 1

            str_delimiter = TypeConverter.convert(delimiter.value, BitArray,
                                                  HexaString).decode('utf-8')
            fieldName = "Field-sep-{}".format(str_delimiter)

            newFields.append(
                Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName))

        newFields.pop()

        # Reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Create a field for each entry
        field.fields = newFields

Example #9

Show file

    def execute(self, field):
        """Try to identify potential key fields in a symbol/field.

        >>> import binascii
        >>> from netzob.all import *
        >>> samples = [b"00ff2f000011",	b"000010000000", b"00fe1f000000", b"000020000000", b"00ff1f000000", b"00ff1f000000", b"00ff2f000000", b"00fe1f000000"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> Format.splitStatic(symbol)
        >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString))
        >>> print(symbol)
        Field-0 | Field-1 | Field-2 | Field-3
        ------- | ------- | ------- | -------
        '00'    | 'ff2f'  | '0000'  | '11'   
        '00'    | '0010'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        '00'    | '0020'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff1f'  | '0000'  | '00'   
        '00'    | 'ff2f'  | '0000'  | '00'   
        '00'    | 'fe1f'  | '0000'  | '00'   
        ------- | ------- | ------- | -------

        >>> finder = FindKeyFields()
        >>> results = finder.execute(symbol)
        >>> for result in results:
        ...     print("Field name: " + result["keyField"].name + ", number of clusters: " + str(result["nbClusters"]) + ", distribution: " + str(result["distribution"]))
        Field name: Field-1, number of clusters: 5, distribution: [2, 1, 2, 1, 2]
        Field name: Field-3, number of clusters: 2, distribution: [1, 7]

        :param field: the field in which we want to identify key fields.
        :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        :raise Exception if something bad happens
        """

        # Safe checks
        if field is None:
            raise TypeError("'field' should not be None")
        if len(field.messages) < 2:
            return []

        results = []
        cells = field.getCells(encoded=False, styled=False, transposed=False)
        columns = list(zip(*cells))

        # Retrieve dynamic fields with fixed size
        for (i, f) in enumerate(field.fields):
            isCandidate = True
            lRef = len(columns[i][1])
            if len(set(columns[i])) <= 1:
                isCandidate = False
                continue
            for val in columns[i][1:]:
                if lRef != len(val):
                    isCandidate = False
                    break
            if isCandidate:
                results.append({"keyField": f})

        # Compute clusters according to each key field found
        from netzob.Inference.Vocabulary.Format import Format
        for result in results:
            tmpClusters = Format.clusterByKeyField(field, result["keyField"])
            result["nbClusters"] = len(tmpClusters)
            distrib = []  # Compute clusters distribution
            for cluster in list(tmpClusters.values()):
                distrib.append(len(cluster.messages))
            result["distribution"] = distrib

        return results

Example #10

Show file

File: FieldSplitDelimiter.py Project: RepublicMaster/netzob

    def split(field, delimiter):
        """Split a field (or symbol) with a specific delimiter. The
        delimiter can be passed either as an ASCII, a Raw, an
        HexaString, or any objects that inherit from AbstractType.

        >>> from netzob.all import *
        >>> samples = ["aaaaff000000ff10",	"bbff110010ff00000011",	"ccccccccfffe1f000000ff12"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages[:3])
        >>> Format.splitDelimiter(symbol, ASCII("ff"))
        >>> print symbol
        'aaaa'     | 'ff' | '000000'     | 'ff' | '10'      
        'bb'       | 'ff' | '110010'     | 'ff' | '00000011'
        'cccccccc' | 'ff' | 'fe1f000000' | 'ff' | '12'      

        >>> samples = ["434d446964656e74696679230400000066726564", "5245536964656e74696679230000000000000000", "434d44696e666f2300000000", "524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e7469667923090000006d7950617373776421","52455361757468656e74696679230000000000000000","434d44656e6372797074230a00000031323334353674657374","524553656e637279707423000000000a00000073707176777436273136","434d4464656372797074230a00000073707176777436273136","5245536465637279707423000000000a00000031323334353674657374","434d446279652300000000","524553627965230000000000000000","434d446964656e746966792307000000526f626572746f","5245536964656e74696679230000000000000000","434d44696e666f2300000000","524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e74696679230a000000615374726f6e67507764","52455361757468656e74696679230000000000000000","434d44656e63727970742306000000616263646566","524553656e6372797074230000000006000000232021262724","434d44646563727970742306000000232021262724","52455364656372797074230000000006000000616263646566","434d446279652300000000","524553627965230000000000000000"]
        >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII))  # Change visualization to hexastring
        >>> Format.splitDelimiter(symbol, ASCII("#"))
        >>> print symbol
        'CMDidentify'   | '#' | '....fred'           | ''  | ''     
        'RESidentify'   | '#' | '........'           | ''  | ''     
        'CMDinfo'       | '#' | '....'               | ''  | ''     
        'RESinfo'       | '#' | '........info'       | ''  | ''     
        'CMDstats'      | '#' | '....'               | ''  | ''     
        'RESstats'      | '#' | '........stats'      | ''  | ''     
        'CMDauthentify' | '#' | '....myPasswd!'      | ''  | ''     
        'RESauthentify' | '#' | '........'           | ''  | ''     
        'CMDencrypt'    | '#' | '....123456test'     | ''  | ''     
        'RESencrypt'    | '#' | "........spqvwt6'16" | ''  | ''     
        'CMDdecrypt'    | '#' | "....spqvwt6'16"     | ''  | ''     
        'RESdecrypt'    | '#' | '........123456test' | ''  | ''     
        'CMDbye'        | '#' | '....'               | ''  | ''     
        'RESbye'        | '#' | '........'           | ''  | ''     
        'CMDidentify'   | '#' | '....Roberto'        | ''  | ''     
        'RESidentify'   | '#' | '........'           | ''  | ''     
        'CMDinfo'       | '#' | '....'               | ''  | ''     
        'RESinfo'       | '#' | '........info'       | ''  | ''     
        'CMDstats'      | '#' | '....'               | ''  | ''     
        'RESstats'      | '#' | '........stats'      | ''  | ''     
        'CMDauthentify' | '#' | '....aStrongPwd'     | ''  | ''     
        'RESauthentify' | '#' | '........'           | ''  | ''     
        'CMDencrypt'    | '#' | '....abcdef'         | ''  | ''     
        'RESencrypt'    | '#' | '........'           | '#' | " !&'$"
        'CMDdecrypt'    | '#' | '....'               | '#' | " !&'$"
        'RESdecrypt'    | '#' | '........abcdef'     | ''  | ''     
        'CMDbye'        | '#' | '....'               | ''  | ''     
        'RESbye'        | '#' | '........'           | ''  | ''     


        :param field : the field to consider when spliting
        :type: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :param delimiter : the delimiter used to split messages of the field
        :type: :class:`netzob.Common.Models.Types.AbstractType.AbstractType`
        """

        if delimiter is None:
            raise TypeError("Delimiter cannot be None.")

        if field is None:
            raise TypeError("Field cannot be None.")

        if len(field.messages) < 1:
            raise ValueError("The associated symbol does not contain any message.")

        # Find message substrings after applying delimiter
        splittedMessages = []

        for cell in field.getValues(encoded=False, styled=False):
            splittedMessage = cell.split(delimiter.value.tobytes())
            splittedMessages.append(splittedMessage)

        import itertools
        # Inverse the array, so that columns contains observed values for each field
        splittedMessages = list(itertools.izip_longest(*splittedMessages))
        
        # If the delimiter does not create splitted fields
        if len(splittedMessages) <= 1:
            return

        # Else, we add (2*len(splittedMessages)-1) fields
        newFields = []
        iField = -1
        for i in range(len(splittedMessages)):
            iField += 1
            fieldDomain = set()
            isEmptyField = True  # To avoid adding an empty field
            emptyValueFound = False
            for v in splittedMessages[i]:
                if v != "" and v is not None:
                    isEmptyField = False
                    fieldDomain.add(Raw(v))
                else:
                    fieldDomain.add(Raw(nbBytes=0))

            if not isEmptyField:
                fieldDomain = list(fieldDomain)
                newField = Field(domain=DomainFactory.normalizeDomain(fieldDomain), name="Field-"+str(iField))
                newField.encodingFunctions = field.encodingFunctions.values()
                newFields.append(newField)
                iField += 1

            fieldName = "Field-sep-" + TypeConverter.convert(delimiter.value, BitArray, HexaString)

            newFields.append(Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName))

        newFields.pop()

        # Reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Create a field for each entry
        field.fields = newFields