def cluster(self, messages): if messages is None: raise TypeError("Messages cannot be None") if len(messages) == 0: raise TypeError("There should be at least one message.") for m in messages: if not isinstance(m, AbstractMessage): raise TypeError( "At least one message ({0}) is not an AbstractMessage.". format(str(m))) self._logger.debug( "Identify similar messages following their alignment (min_equivalence={0})" .format(self.minEquivalence)) self._logger.debug( "Initiating the clustering by alignment on {0} messages...".format( len(messages))) symbols = self._processUPGMA(messages, self.recomputeMatrixThreshold) self._logger.debug("Clustering completed, computing final alignment.") # Retrieve the alignment of each symbol and the build the associated regular expression for symbol in symbols: self._logger.debug("Align messages from symbol {0}".format( symbol.name)) from netzob.Inference.Vocabulary.Format import Format Format.splitAligned(symbol, useSemantic=False) return symbols
def cluster(self, messages): if messages is None: raise TypeError("Messages cannot be None") if len(messages) == 0: raise TypeError("There should be at least one message.") for m in messages: if not isinstance(m, AbstractMessage): raise TypeError( "At least one message ({0}) is not an AbstractMessage.". format(str(m))) self._logger.debug( "Identify similar messages following their alignment (min_equivalence={0})". format(self.minEquivalence)) self._logger.debug( "Initiating the clustering by alignment on {0} messages...".format( len(messages))) symbols = self._processUPGMA(messages, self.recomputeMatrixThreshold) self._logger.debug("Clustering completed, computing final alignment.") # Retrieve the alignment of each symbol and the build the associated regular expression for symbol in symbols: self._logger.debug( "Align messages from symbol {0}".format(symbol.name)) from netzob.Inference.Vocabulary.Format import Format Format.splitAligned(symbol, useSemantic=False) return symbols
def execute(self, field, useSemantic=True): """Execute the alignement on the specified field. :parameter field: the field that will be aligned :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField` """ if field is None: raise TypeError("Field cannot be None") if useSemantic is None: raise TypeError("useSemantic cannot be None") # First step: we clean and reset the field from netzob.Inference.Vocabulary.Format import Format Format.resetFormat(field) # Retrieve all the segment of messages to align messageValues = field.getMessageValues(encoded=False, styled=False) # Semantic tags (a.k.a applicative data) semanticTags = None if useSemantic: semanticTags = [ self.__searchApplicativeDataInMessage(message) for message, values in list(messageValues.items()) ] if len(list(messageValues.values())) == 0: return # Execute the alignement (alignment, semanticTags, score) = self._alignData(list(messageValues.values()), semanticTags) # Check the results if alignment is None: raise ValueError( "Impossible to compute an alignment for the specifed data") # Build Fields based on computed alignement and semantic tags self._updateFieldsFromAlignment(field, alignment, semanticTags)
def execute(self, field, useSemantic=True): """Execute the alignement on the specified field. :parameter field: the field that will be aligned :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField` """ if field is None: raise TypeError("Field cannot be None") if useSemantic is None: raise TypeError("useSemantic cannot be None") # First step: we clean and reset the field from netzob.Inference.Vocabulary.Format import Format Format.resetFormat(field) # Retrieve all the segment of messages to align messageValues = field.getMessageValues(encoded=False, styled=False) # Semantic tags (a.k.a applicative data) semanticTags = None if useSemantic: semanticTags = [ self.__searchApplicativeDataInMessage(message) for message, values in list(messageValues.items()) ] if len(list(messageValues.values())) == 0: return # Execute the alignement (alignment, semanticTags, score) = self._alignData( list(messageValues.values()), semanticTags) # Check the results if alignment is None: raise ValueError( "Impossible to compute an alignment for the specifed data") # Build Fields based on computed alignement and semantic tags self._updateFieldsFromAlignment(field, alignment, semanticTags)
def test_exportToScapy(self): # Test if exportToScapy function in ScapyExporter is working correctly. # Test if created file runs without error # Verify the content of the exported file is as expected, including size info. # check for both iterable and non-iterable symbols m1 = RawMessage("\x09\x70\x95\xcc\xef") m2 = RawMessage("\x0a\x70\x03\x8f\x23\x5f") m3 = RawMessage("\x09\x70\x31\xa0") m4 = RawMessage("\x0a\xd6\xb5\x5b") messages = [m1, m2, m3, m4] symbols = Symbol(messages=messages) with self.assertRaises(TypeError): iter(symbols) self.assertEqual( ScapyExporter().exportToScapy(symbols, 'test1.py', 'ProtocolName'), None) self.assertEqual(os.system("python test1.py"), 0) import test1 self.assertEqual(test1.ProtocolName_Symbol().fields_desc[0].name, 'Field') self.assertEqual(test1.ProtocolName_Symbol().fields_desc[0].default, None) self.assertEqual(test1.ProtocolName_Symbol().fields_desc[0].cls, (0, None)) # size with self.assertRaises(AttributeError): test1.ProtocolName_Symbol1().fields_desc[0].name os.remove('test1.py') symbl = Format.clusterByAlignment(messages, minEquivalence=50, internalSlick=True) self.assertEqual(type(symbl), list) self.assertEqual( ScapyExporter().exportToScapy(symbl, 'test2.py', 'ProtocolName'), None) self.assertEqual(os.system("python test2.py"), 0) import test2 self.assertEqual(test2.ProtocolName_Symbol0().fields_desc[0].name, 'Field00') self.assertEqual(test2.ProtocolName_Symbol0().fields_desc[0].cls, (0, 32)) # size self.assertEqual(test2.ProtocolName_Symbol1().fields_desc[1].default, 'p') self.assertEqual(test2.ProtocolName_Symbol1().Field01, 'p') self.assertEqual(len(test2.ProtocolName_Symbol1().fields_desc), 3) os.remove('test2.py')
def execute(self, field): """Try to identify potential key fields in a symbol/field. >>> import binascii >>> from netzob.all import * >>> samples = ["00ff2f000011", "000010000000", "00fe1f000000", "000020000000", "00ff1f000000", "00ff1f000000", "00ff2f000000", "00fe1f000000"] >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples] >>> symbol = Symbol(messages=messages) >>> Format.splitStatic(symbol) >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString)) >>> print symbol Field-0 | Field-1 | Field-2 | Field-3 ------- | ------- | ------- | ------- '00' | 'ff2f' | '0000' | '11' '00' | '0010' | '0000' | '00' '00' | 'fe1f' | '0000' | '00' '00' | '0020' | '0000' | '00' '00' | 'ff1f' | '0000' | '00' '00' | 'ff1f' | '0000' | '00' '00' | 'ff2f' | '0000' | '00' '00' | 'fe1f' | '0000' | '00' ------- | ------- | ------- | ------- >>> finder = FindKeyFields() >>> results = finder.execute(symbol) >>> for result in results: ... print "Field name: " + result["keyField"].name + ", number of clusters: " + str(result["nbClusters"]) + ", distribution: " + str(result["distribution"]) Field name: Field-1, number of clusters: 5, distribution: [2, 1, 2, 2, 1] Field name: Field-3, number of clusters: 2, distribution: [1, 7] :param field: the field in which we want to identify key fields. :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :raise Exception if something bad happens """ # Safe checks if field is None: raise TypeError("'field' should not be None") if len(field.messages) < 2: return [] results = [] cells = field.getCells(encoded=False, styled=False, transposed=False) columns = zip(*cells) # Retrieve dynamic fields with fixed size for (i, f) in enumerate(field.fields): isCandidate = True lRef = len(columns[i][1]) if len(set(columns[i])) <= 1: isCandidate = False continue for val in columns[i][1:]: if lRef != len(val): isCandidate = False break if isCandidate: results.append({"keyField": f}) # Compute clusters according to each key field found from netzob.Inference.Vocabulary.Format import Format for result in results: tmpClusters = Format.clusterByKeyField(field, result["keyField"]) result["nbClusters"] = len(tmpClusters) distrib = [] # Compute clusters distribution for cluster in tmpClusters.values(): distrib.append(len(cluster.messages)) result["distribution"] = distrib return results
def split(field, delimiter): """Split a field (or symbol) with a specific delimiter. The delimiter can be passed either as an ASCII, a Raw, an HexaString, or any objects that inherit from AbstractType. >>> from netzob.all import * >>> samples = [b"aaaaff000000ff10", b"bbff110010ff00000011", b"ccccccccfffe1f000000ff12"] >>> messages = [RawMessage(data=sample) for sample in samples] >>> symbol = Symbol(messages=messages[:3]) >>> Format.splitDelimiter(symbol, ASCII("ff")) >>> print(symbol) Field-0 | Field-sep-6666 | Field-2 | Field-sep-6666 | Field-4 ---------- | -------------- | ------------ | -------------- | ---------- 'aaaa' | 'ff' | '000000' | 'ff' | '10' 'bb' | 'ff' | '110010' | 'ff' | '00000011' 'cccccccc' | 'ff' | 'fe1f000000' | 'ff' | '12' ---------- | -------------- | ------------ | -------------- | ---------- >>> samples = [b"434d446964656e74696679230400000066726564", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e7469667923090000006d7950617373776421", b"52455361757468656e74696679230000000000000000", b"434d44656e6372797074230a00000031323334353674657374", b"524553656e637279707423000000000a00000073707176777436273136", b"434d4464656372797074230a00000073707176777436273136", b"5245536465637279707423000000000a00000031323334353674657374", b"434d446279652300000000", b"524553627965230000000000000000", b"434d446964656e746966792307000000526f626572746f", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e74696679230a000000615374726f6e67507764", b"52455361757468656e74696679230000000000000000", b"434d44656e63727970742306000000616263646566", b"524553656e6372797074230000000006000000232021262724", b"434d44646563727970742306000000232021262724", b"52455364656372797074230000000006000000616263646566", b"434d446279652300000000", b"524553627965230000000000000000"] >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples] >>> symbol = Symbol(messages=messages) >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII)) # Change visualization to hexastring >>> Format.splitDelimiter(symbol, ASCII("#")) >>> print(symbol) Field-0 | Field-sep-23 | Field-2 | Field-sep-23 | Field-4 --------------- | ------------ | -------------------- | ------------ | ------- 'CMDidentify' | '#' | '....fred' | '' | '' 'RESidentify' | '#' | '........' | '' | '' 'CMDinfo' | '#' | '....' | '' | '' 'RESinfo' | '#' | '........info' | '' | '' 'CMDstats' | '#' | '....' | '' | '' 'RESstats' | '#' | '........stats' | '' | '' 'CMDauthentify' | '#' | '....myPasswd!' | '' | '' 'RESauthentify' | '#' | '........' | '' | '' 'CMDencrypt' | '#' | '....123456test' | '' | '' 'RESencrypt' | '#' | "........spqvwt6'16" | '' | '' 'CMDdecrypt' | '#' | "....spqvwt6'16" | '' | '' 'RESdecrypt' | '#' | '........123456test' | '' | '' 'CMDbye' | '#' | '....' | '' | '' 'RESbye' | '#' | '........' | '' | '' 'CMDidentify' | '#' | '....Roberto' | '' | '' 'RESidentify' | '#' | '........' | '' | '' 'CMDinfo' | '#' | '....' | '' | '' 'RESinfo' | '#' | '........info' | '' | '' 'CMDstats' | '#' | '....' | '' | '' 'RESstats' | '#' | '........stats' | '' | '' 'CMDauthentify' | '#' | '....aStrongPwd' | '' | '' 'RESauthentify' | '#' | '........' | '' | '' 'CMDencrypt' | '#' | '....abcdef' | '' | '' 'RESencrypt' | '#' | '........' | '#' | " !&'$" 'CMDdecrypt' | '#' | '....' | '#' | " !&'$" 'RESdecrypt' | '#' | '........abcdef' | '' | '' 'CMDbye' | '#' | '....' | '' | '' 'RESbye' | '#' | '........' | '' | '' --------------- | ------------ | -------------------- | ------------ | ------- >>> print(symbol.fields[0]._str_debug()) Field-0 |-- Alt |-- Data (Raw=b'CMDidentify' ((0, 88))) |-- Data (Raw=b'RESidentify' ((0, 88))) |-- Data (Raw=b'CMDinfo' ((0, 56))) |-- Data (Raw=b'RESinfo' ((0, 56))) |-- Data (Raw=b'CMDstats' ((0, 64))) |-- Data (Raw=b'RESstats' ((0, 64))) |-- Data (Raw=b'CMDauthentify' ((0, 104))) |-- Data (Raw=b'RESauthentify' ((0, 104))) |-- Data (Raw=b'CMDencrypt' ((0, 80))) |-- Data (Raw=b'RESencrypt' ((0, 80))) |-- Data (Raw=b'CMDdecrypt' ((0, 80))) |-- Data (Raw=b'RESdecrypt' ((0, 80))) |-- Data (Raw=b'CMDbye' ((0, 48))) |-- Data (Raw=b'RESbye' ((0, 48))) Below is another example of the FieldSplitDelimiter usage: it splits fields based on a Raw string. >>> from netzob.all import * >>> samples = [b"\\x01\\x02\\x03\\xff\\x04\\x05\\xff\\x06\\x07", b"\\x01\\x02\\xff\\x03\\x04\\x05\\x06\\xff\\x07", b"\\x01\\xff\\x02\\x03\\x04\\x05\\x06"] >>> messages = [RawMessage(data=sample) for sample in samples] >>> symbol = Symbol(messages=messages) >>> Format.splitDelimiter(symbol, Raw(b"\\xff")) >>> print(symbol) Field-0 | Field-sep-ff | Field-2 | Field-sep-ff | Field-4 -------------- | ------------ | ---------------------- | ------------ | ---------- '\\x01\\x02\\x03' | b'\\xff' | '\\x04\\x05' | b'\\xff' | '\\x06\\x07' '\\x01\\x02' | b'\\xff' | '\\x03\\x04\\x05\\x06' | b'\\xff' | '\\x07' '\\x01' | b'\\xff' | '\\x02\\x03\\x04\\x05\\x06' | '' | '' -------------- | ------------ | ---------------------- | ------------ | ---------- :param field : the field to consider when spliting :type: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField` :param delimiter : the delimiter used to split messages of the field :type: :class:`netzob.Model.Types.AbstractType.AbstractType` """ if delimiter is None: raise TypeError("Delimiter cannot be None.") if field is None: raise TypeError("Field cannot be None.") if len(field.messages) < 1: raise ValueError("The associated symbol does not contain any message.") # Find message substrings after applying delimiter splittedMessages = [] for cell in field.getValues(encoded=False, styled=False): splittedMessage = cell.split(delimiter.value.tobytes()) splittedMessages.append(splittedMessage) import itertools # Inverse the array, so that columns contains observed values for each field splittedMessages = list(itertools.zip_longest(*splittedMessages)) # If the delimiter does not create splitted fields if len(splittedMessages) <= 1: return # Else, we add (2*len(splittedMessages)-1) fields newFields = [] iField = -1 for i in range(len(splittedMessages)): iField += 1 fieldDomain = list() # temporary set that hosts all the observed values to prevent useless duplicate ones observedValues = set() has_inserted_empty_value = False isEmptyField = True # To avoid adding an empty field for v in splittedMessages[i]: if v != "" and v is not None: isEmptyField = False if v not in observedValues: fieldDomain.append(Raw(v)) observedValues.add(v) else: if not has_inserted_empty_value: fieldDomain.append(Raw(nbBytes=0)) has_inserted_empty_value = True if not isEmptyField: newField = Field(domain=DomainFactory.normalizeDomain(fieldDomain), name="Field-"+str(iField)) newField.encodingFunctions = list(field.encodingFunctions.values()) newFields.append(newField) iField += 1 str_delimiter = TypeConverter.convert(delimiter.value, BitArray, HexaString).decode('utf-8') fieldName = "Field-sep-{}".format(str_delimiter) newFields.append(Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName)) newFields.pop() # Reset the field from netzob.Inference.Vocabulary.Format import Format Format.resetFormat(field) # Create a field for each entry field.fields = newFields
def split(field, delimiter): """Split a field (or symbol) with a specific delimiter. The delimiter can be passed either as an ASCII, a Raw, an HexaString, or any objects that inherit from AbstractType. >>> from netzob.all import * >>> samples = [b"aaaaff000000ff10", b"bbff110010ff00000011", b"ccccccccfffe1f000000ff12"] >>> messages = [RawMessage(data=sample) for sample in samples] >>> symbol = Symbol(messages=messages[:3]) >>> Format.splitDelimiter(symbol, ASCII("ff")) >>> print(symbol) Field-0 | Field-sep-6666 | Field-2 | Field-sep-6666 | Field-4 ---------- | -------------- | ------------ | -------------- | ---------- 'aaaa' | 'ff' | '000000' | 'ff' | '10' 'bb' | 'ff' | '110010' | 'ff' | '00000011' 'cccccccc' | 'ff' | 'fe1f000000' | 'ff' | '12' ---------- | -------------- | ------------ | -------------- | ---------- >>> samples = [b"434d446964656e74696679230400000066726564", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e7469667923090000006d7950617373776421", b"52455361757468656e74696679230000000000000000", b"434d44656e6372797074230a00000031323334353674657374", b"524553656e637279707423000000000a00000073707176777436273136", b"434d4464656372797074230a00000073707176777436273136", b"5245536465637279707423000000000a00000031323334353674657374", b"434d446279652300000000", b"524553627965230000000000000000", b"434d446964656e746966792307000000526f626572746f", b"5245536964656e74696679230000000000000000", b"434d44696e666f2300000000", b"524553696e666f230000000004000000696e666f", b"434d4473746174732300000000", b"52455373746174732300000000050000007374617473", b"434d4461757468656e74696679230a000000615374726f6e67507764", b"52455361757468656e74696679230000000000000000", b"434d44656e63727970742306000000616263646566", b"524553656e6372797074230000000006000000232021262724", b"434d44646563727970742306000000232021262724", b"52455364656372797074230000000006000000616263646566", b"434d446279652300000000", b"524553627965230000000000000000"] >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples] >>> symbol = Symbol(messages=messages) >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII)) # Change visualization to hexastring >>> Format.splitDelimiter(symbol, ASCII("#")) >>> print(symbol) Field-0 | Field-sep-23 | Field-2 | Field-sep-23 | Field-4 --------------- | ------------ | -------------------- | ------------ | ------- 'CMDidentify' | '#' | '....fred' | '' | '' 'RESidentify' | '#' | '........' | '' | '' 'CMDinfo' | '#' | '....' | '' | '' 'RESinfo' | '#' | '........info' | '' | '' 'CMDstats' | '#' | '....' | '' | '' 'RESstats' | '#' | '........stats' | '' | '' 'CMDauthentify' | '#' | '....myPasswd!' | '' | '' 'RESauthentify' | '#' | '........' | '' | '' 'CMDencrypt' | '#' | '....123456test' | '' | '' 'RESencrypt' | '#' | "........spqvwt6'16" | '' | '' 'CMDdecrypt' | '#' | "....spqvwt6'16" | '' | '' 'RESdecrypt' | '#' | '........123456test' | '' | '' 'CMDbye' | '#' | '....' | '' | '' 'RESbye' | '#' | '........' | '' | '' 'CMDidentify' | '#' | '....Roberto' | '' | '' 'RESidentify' | '#' | '........' | '' | '' 'CMDinfo' | '#' | '....' | '' | '' 'RESinfo' | '#' | '........info' | '' | '' 'CMDstats' | '#' | '....' | '' | '' 'RESstats' | '#' | '........stats' | '' | '' 'CMDauthentify' | '#' | '....aStrongPwd' | '' | '' 'RESauthentify' | '#' | '........' | '' | '' 'CMDencrypt' | '#' | '....abcdef' | '' | '' 'RESencrypt' | '#' | '........' | '#' | " !&'$" 'CMDdecrypt' | '#' | '....' | '#' | " !&'$" 'RESdecrypt' | '#' | '........abcdef' | '' | '' 'CMDbye' | '#' | '....' | '' | '' 'RESbye' | '#' | '........' | '' | '' --------------- | ------------ | -------------------- | ------------ | ------- >>> print(symbol.fields[0]._str_debug()) Field-0 |-- Alt |-- Data (Raw=b'CMDidentify' ((0, 88))) |-- Data (Raw=b'RESidentify' ((0, 88))) |-- Data (Raw=b'CMDinfo' ((0, 56))) |-- Data (Raw=b'RESinfo' ((0, 56))) |-- Data (Raw=b'CMDstats' ((0, 64))) |-- Data (Raw=b'RESstats' ((0, 64))) |-- Data (Raw=b'CMDauthentify' ((0, 104))) |-- Data (Raw=b'RESauthentify' ((0, 104))) |-- Data (Raw=b'CMDencrypt' ((0, 80))) |-- Data (Raw=b'RESencrypt' ((0, 80))) |-- Data (Raw=b'CMDdecrypt' ((0, 80))) |-- Data (Raw=b'RESdecrypt' ((0, 80))) |-- Data (Raw=b'CMDbye' ((0, 48))) |-- Data (Raw=b'RESbye' ((0, 48))) Below is another example of the FieldSplitDelimiter usage: it splits fields based on a Raw string. >>> from netzob.all import * >>> samples = [b"\\x01\\x02\\x03\\xff\\x04\\x05\\xff\\x06\\x07", b"\\x01\\x02\\xff\\x03\\x04\\x05\\x06\\xff\\x07", b"\\x01\\xff\\x02\\x03\\x04\\x05\\x06"] >>> messages = [RawMessage(data=sample) for sample in samples] >>> symbol = Symbol(messages=messages) >>> Format.splitDelimiter(symbol, Raw(b"\\xff")) >>> print(symbol) Field-0 | Field-sep-ff | Field-2 | Field-sep-ff | Field-4 -------------- | ------------ | ---------------------- | ------------ | ---------- '\\x01\\x02\\x03' | b'\\xff' | '\\x04\\x05' | b'\\xff' | '\\x06\\x07' '\\x01\\x02' | b'\\xff' | '\\x03\\x04\\x05\\x06' | b'\\xff' | '\\x07' '\\x01' | b'\\xff' | '\\x02\\x03\\x04\\x05\\x06' | '' | '' -------------- | ------------ | ---------------------- | ------------ | ---------- :param field : the field to consider when spliting :type: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField` :param delimiter : the delimiter used to split messages of the field :type: :class:`netzob.Model.Vocabulary.Types.AbstractType.AbstractType` """ if delimiter is None: raise TypeError("Delimiter cannot be None.") if field is None: raise TypeError("Field cannot be None.") if len(field.messages) < 1: raise ValueError( "The associated symbol does not contain any message.") # Find message substrings after applying delimiter splittedMessages = [] for cell in field.getValues(encoded=False, styled=False): splittedMessage = cell.split(delimiter.value.tobytes()) splittedMessages.append(splittedMessage) import itertools # Inverse the array, so that columns contains observed values for each field splittedMessages = list(itertools.zip_longest(*splittedMessages)) # If the delimiter does not create splitted fields if len(splittedMessages) <= 1: return # Else, we add (2*len(splittedMessages)-1) fields newFields = [] iField = -1 for i in range(len(splittedMessages)): iField += 1 fieldDomain = list() # temporary set that hosts all the observed values to prevent useless duplicate ones observedValues = set() has_inserted_empty_value = False isEmptyField = True # To avoid adding an empty field for v in splittedMessages[i]: if v != "" and v is not None: isEmptyField = False if v not in observedValues: fieldDomain.append(Raw(v)) observedValues.add(v) else: if not has_inserted_empty_value: fieldDomain.append(Raw(nbBytes=0)) has_inserted_empty_value = True if not isEmptyField: newField = Field( domain=DomainFactory.normalizeDomain(fieldDomain), name="Field-" + str(iField)) newField.encodingFunctions = list( field.encodingFunctions.values()) newFields.append(newField) iField += 1 str_delimiter = TypeConverter.convert(delimiter.value, BitArray, HexaString).decode('utf-8') fieldName = "Field-sep-{}".format(str_delimiter) newFields.append( Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName)) newFields.pop() # Reset the field from netzob.Inference.Vocabulary.Format import Format Format.resetFormat(field) # Create a field for each entry field.fields = newFields
def execute(self, field): """Try to identify potential key fields in a symbol/field. >>> import binascii >>> from netzob.all import * >>> samples = [b"00ff2f000011", b"000010000000", b"00fe1f000000", b"000020000000", b"00ff1f000000", b"00ff1f000000", b"00ff2f000000", b"00fe1f000000"] >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples] >>> symbol = Symbol(messages=messages) >>> Format.splitStatic(symbol) >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString)) >>> print(symbol) Field-0 | Field-1 | Field-2 | Field-3 ------- | ------- | ------- | ------- '00' | 'ff2f' | '0000' | '11' '00' | '0010' | '0000' | '00' '00' | 'fe1f' | '0000' | '00' '00' | '0020' | '0000' | '00' '00' | 'ff1f' | '0000' | '00' '00' | 'ff1f' | '0000' | '00' '00' | 'ff2f' | '0000' | '00' '00' | 'fe1f' | '0000' | '00' ------- | ------- | ------- | ------- >>> finder = FindKeyFields() >>> results = finder.execute(symbol) >>> for result in results: ... print("Field name: " + result["keyField"].name + ", number of clusters: " + str(result["nbClusters"]) + ", distribution: " + str(result["distribution"])) Field name: Field-1, number of clusters: 5, distribution: [2, 1, 2, 1, 2] Field name: Field-3, number of clusters: 2, distribution: [1, 7] :param field: the field in which we want to identify key fields. :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField` :raise Exception if something bad happens """ # Safe checks if field is None: raise TypeError("'field' should not be None") if len(field.messages) < 2: return [] results = [] cells = field.getCells(encoded=False, styled=False, transposed=False) columns = list(zip(*cells)) # Retrieve dynamic fields with fixed size for (i, f) in enumerate(field.fields): isCandidate = True lRef = len(columns[i][1]) if len(set(columns[i])) <= 1: isCandidate = False continue for val in columns[i][1:]: if lRef != len(val): isCandidate = False break if isCandidate: results.append({"keyField": f}) # Compute clusters according to each key field found from netzob.Inference.Vocabulary.Format import Format for result in results: tmpClusters = Format.clusterByKeyField(field, result["keyField"]) result["nbClusters"] = len(tmpClusters) distrib = [] # Compute clusters distribution for cluster in list(tmpClusters.values()): distrib.append(len(cluster.messages)) result["distribution"] = distrib return results
def split(field, delimiter): """Split a field (or symbol) with a specific delimiter. The delimiter can be passed either as an ASCII, a Raw, an HexaString, or any objects that inherit from AbstractType. >>> from netzob.all import * >>> samples = ["aaaaff000000ff10", "bbff110010ff00000011", "ccccccccfffe1f000000ff12"] >>> messages = [RawMessage(data=sample) for sample in samples] >>> symbol = Symbol(messages=messages[:3]) >>> Format.splitDelimiter(symbol, ASCII("ff")) >>> print symbol 'aaaa' | 'ff' | '000000' | 'ff' | '10' 'bb' | 'ff' | '110010' | 'ff' | '00000011' 'cccccccc' | 'ff' | 'fe1f000000' | 'ff' | '12' >>> samples = ["434d446964656e74696679230400000066726564", "5245536964656e74696679230000000000000000", "434d44696e666f2300000000", "524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e7469667923090000006d7950617373776421","52455361757468656e74696679230000000000000000","434d44656e6372797074230a00000031323334353674657374","524553656e637279707423000000000a00000073707176777436273136","434d4464656372797074230a00000073707176777436273136","5245536465637279707423000000000a00000031323334353674657374","434d446279652300000000","524553627965230000000000000000","434d446964656e746966792307000000526f626572746f","5245536964656e74696679230000000000000000","434d44696e666f2300000000","524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e74696679230a000000615374726f6e67507764","52455361757468656e74696679230000000000000000","434d44656e63727970742306000000616263646566","524553656e6372797074230000000006000000232021262724","434d44646563727970742306000000232021262724","52455364656372797074230000000006000000616263646566","434d446279652300000000","524553627965230000000000000000"] >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples] >>> symbol = Symbol(messages=messages) >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII)) # Change visualization to hexastring >>> Format.splitDelimiter(symbol, ASCII("#")) >>> print symbol 'CMDidentify' | '#' | '....fred' | '' | '' 'RESidentify' | '#' | '........' | '' | '' 'CMDinfo' | '#' | '....' | '' | '' 'RESinfo' | '#' | '........info' | '' | '' 'CMDstats' | '#' | '....' | '' | '' 'RESstats' | '#' | '........stats' | '' | '' 'CMDauthentify' | '#' | '....myPasswd!' | '' | '' 'RESauthentify' | '#' | '........' | '' | '' 'CMDencrypt' | '#' | '....123456test' | '' | '' 'RESencrypt' | '#' | "........spqvwt6'16" | '' | '' 'CMDdecrypt' | '#' | "....spqvwt6'16" | '' | '' 'RESdecrypt' | '#' | '........123456test' | '' | '' 'CMDbye' | '#' | '....' | '' | '' 'RESbye' | '#' | '........' | '' | '' 'CMDidentify' | '#' | '....Roberto' | '' | '' 'RESidentify' | '#' | '........' | '' | '' 'CMDinfo' | '#' | '....' | '' | '' 'RESinfo' | '#' | '........info' | '' | '' 'CMDstats' | '#' | '....' | '' | '' 'RESstats' | '#' | '........stats' | '' | '' 'CMDauthentify' | '#' | '....aStrongPwd' | '' | '' 'RESauthentify' | '#' | '........' | '' | '' 'CMDencrypt' | '#' | '....abcdef' | '' | '' 'RESencrypt' | '#' | '........' | '#' | " !&'$" 'CMDdecrypt' | '#' | '....' | '#' | " !&'$" 'RESdecrypt' | '#' | '........abcdef' | '' | '' 'CMDbye' | '#' | '....' | '' | '' 'RESbye' | '#' | '........' | '' | '' :param field : the field to consider when spliting :type: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :param delimiter : the delimiter used to split messages of the field :type: :class:`netzob.Common.Models.Types.AbstractType.AbstractType` """ if delimiter is None: raise TypeError("Delimiter cannot be None.") if field is None: raise TypeError("Field cannot be None.") if len(field.messages) < 1: raise ValueError("The associated symbol does not contain any message.") # Find message substrings after applying delimiter splittedMessages = [] for cell in field.getValues(encoded=False, styled=False): splittedMessage = cell.split(delimiter.value.tobytes()) splittedMessages.append(splittedMessage) import itertools # Inverse the array, so that columns contains observed values for each field splittedMessages = list(itertools.izip_longest(*splittedMessages)) # If the delimiter does not create splitted fields if len(splittedMessages) <= 1: return # Else, we add (2*len(splittedMessages)-1) fields newFields = [] iField = -1 for i in range(len(splittedMessages)): iField += 1 fieldDomain = set() isEmptyField = True # To avoid adding an empty field emptyValueFound = False for v in splittedMessages[i]: if v != "" and v is not None: isEmptyField = False fieldDomain.add(Raw(v)) else: fieldDomain.add(Raw(nbBytes=0)) if not isEmptyField: fieldDomain = list(fieldDomain) newField = Field(domain=DomainFactory.normalizeDomain(fieldDomain), name="Field-"+str(iField)) newField.encodingFunctions = field.encodingFunctions.values() newFields.append(newField) iField += 1 fieldName = "Field-sep-" + TypeConverter.convert(delimiter.value, BitArray, HexaString) newFields.append(Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName)) newFields.pop() # Reset the field from netzob.Inference.Vocabulary.Format import Format Format.resetFormat(field) # Create a field for each entry field.fields = newFields