def cluster(self, field, keyField): """Create and return new symbols according to a specific key field. >>> import binascii >>> from netzob.all import * >>> samples = ["00ff2f000000", "000020000000", "00ff2f000000"] >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples] >>> f1 = Field(Raw(nbBytes=1)) >>> f2 = Field(Raw(nbBytes=2)) >>> f3 = Field(Raw(nbBytes=3)) >>> symbol = Symbol([f1, f2, f3], messages=messages) >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString)) >>> newSymbols = Format.clusterByKeyField(symbol, f2) >>> for sym in newSymbols.values(): ... sym.addEncodingFunction(TypeEncodingFunction(HexaString)) ... print sym.name + ":" ... print sym Symbol_ff2f: Field | Field | Field ----- | ------ | -------- '00' | 'ff2f' | '000000' '00' | 'ff2f' | '000000' ----- | ------ | -------- Symbol_0020: Field | Field | Field ----- | ------ | -------- '00' | '0020' | '000000' ----- | ------ | -------- :param field: the field we want to split in new symbols :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :param keyField: the field used as a key during the splitting operation :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :raise Exception if something bad happens """ # Safe checks if field is None: raise TypeError("'field' should not be None") if keyField is None: raise TypeError("'keyField' should not be None") if keyField not in field.fields: raise TypeError("'keyField' is not a child of 'field'") newSymbols = {} keyFieldMessageValues = keyField.getMessageValues(encoded=False, styled=False) newSymbolsSplittedMessages = {} # we identify what would be the best type of the key field keyFieldType = ASCII for message, keyFieldValue in keyFieldMessageValues.iteritems(): # If the value cannot be parsed as ASCII, we convert it to HexaString if not ASCII().canParse(TypeConverter.convert(keyFieldValue, Raw, BitArray)): keyFieldType = HexaString break # Even if the value is theoritically parsable as ASCII, some caracters cannot be encoded, so we double check tmp_value = TypeConverter.convert(keyFieldValue, Raw, ASCII) tmp2_value = TypeConverter.convert(tmp_value, ASCII, Raw) if keyFieldValue != tmp2_value: # This means we cannot retrieve the original value by encoding and then decoding in ASCII keyFieldType = HexaString break # we create a symbol for each of these uniq values for message, keyFieldValue in keyFieldMessageValues.iteritems(): keyFieldValue = TypeConverter.convert(keyFieldValue, Raw, keyFieldType) if keyFieldValue not in newSymbols.keys(): symbolName = "Symbol_{0}".format(keyFieldValue) newSymbols[keyFieldValue] = Symbol(name=symbolName, messages=[message]) splittedMessages = DataAlignment.align([message.data], field, encoded=False) newSymbolsSplittedMessages[keyFieldValue] = [splittedMessages[0]] else: newSymbols[keyFieldValue].messages.append(message) splittedMessages = DataAlignment.align([message.data], field, encoded=False) newSymbolsSplittedMessages[keyFieldValue].append(splittedMessages[0]) for newSymbolKeyValue, newSymbol in newSymbols.iteritems(): # we recreate the same fields in this new symbol as the fields that exist in the original symbol newSymbol.clearFields() for i, f in enumerate(field.fields): if f == keyField: newFieldDomain = TypeConverter.convert(newSymbolKeyValue, keyFieldType, Raw) else: newFieldDomain = set() for j in range(len(newSymbolsSplittedMessages[newSymbolKeyValue])): newFieldDomain.add(newSymbolsSplittedMessages[newSymbolKeyValue][j][i]) newFieldDomain = list(newFieldDomain) newF = Field(name=f.name, domain=newFieldDomain) newF.parent = newSymbol newSymbol.fields.append(newF) # we remove endless fields that accepts no values cells = newSymbol.getCells(encoded=False, styled=False, transposed=False) max_i_cell_with_value = 0 for line in cells: for i_cell, cell in enumerate(line): if cell != '' and max_i_cell_with_value < i_cell: max_i_cell_with_value = i_cell newSymbol.clearFields() for i, f in enumerate(field.fields[:max_i_cell_with_value + 1]): if f == keyField: newFieldDomain = TypeConverter.convert(newSymbolKeyValue, keyFieldType, Raw) else: newFieldDomain = set() for j in range(len(newSymbolsSplittedMessages[newSymbolKeyValue])): newFieldDomain.add(newSymbolsSplittedMessages[newSymbolKeyValue][j][i]) newFieldDomain = list(newFieldDomain) newF = Field(name=f.name, domain=newFieldDomain) newF.parent = newSymbol newSymbol.fields.append(newF) return newSymbols
def cluster(self, field, keyField): """Create and return new symbols according to a specific key field. >>> import binascii >>> from netzob.all import * >>> samples = ["00ff2f000000", "000020000000", "00ff2f000000"] >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples] >>> f1 = Field(Raw(nbBytes=1)) >>> f2 = Field(Raw(nbBytes=2)) >>> f3 = Field(Raw(nbBytes=3)) >>> symbol = Symbol([f1, f2, f3], messages=messages) >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString)) >>> newSymbols = Format.clusterByKeyField(symbol, f2) >>> for sym in newSymbols.values(): ... sym.addEncodingFunction(TypeEncodingFunction(HexaString)) ... print sym.name + ":" ... print sym Symbol_ff2f: Field | Field | Field ----- | ------ | -------- '00' | 'ff2f' | '000000' '00' | 'ff2f' | '000000' ----- | ------ | -------- Symbol_0020: Field | Field | Field ----- | ------ | -------- '00' | '0020' | '000000' ----- | ------ | -------- :param field: the field we want to split in new symbols :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :param keyField: the field used as a key during the splitting operation :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :raise Exception if something bad happens """ # Safe checks if field is None: raise TypeError("'field' should not be None") if keyField is None: raise TypeError("'keyField' should not be None") if keyField not in field.fields: raise TypeError("'keyField' is not a child of 'field'") newSymbols = {} keyFieldMessageValues = keyField.getMessageValues(encoded=False, styled=False) newSymbolsSplittedMessages = {} # we identify what would be the best type of the key field keyFieldType = ASCII for message, keyFieldValue in keyFieldMessageValues.iteritems(): # If the value cannot be parsed as ASCII, we convert it to HexaString if not ASCII().canParse( TypeConverter.convert(keyFieldValue, Raw, BitArray)): keyFieldType = HexaString break # Even if the value is theoritically parsable as ASCII, some caracters cannot be encoded, so we double check tmp_value = TypeConverter.convert(keyFieldValue, Raw, ASCII) tmp2_value = TypeConverter.convert(tmp_value, ASCII, Raw) if keyFieldValue != tmp2_value: # This means we cannot retrieve the original value by encoding and then decoding in ASCII keyFieldType = HexaString break # we create a symbol for each of these uniq values for message, keyFieldValue in keyFieldMessageValues.iteritems(): keyFieldValue = TypeConverter.convert(keyFieldValue, Raw, keyFieldType) if keyFieldValue not in newSymbols.keys(): symbolName = "Symbol_{0}".format(keyFieldValue) newSymbols[keyFieldValue] = Symbol(name=symbolName, messages=[message]) splittedMessages = DataAlignment.align([message.data], field, encoded=False) newSymbolsSplittedMessages[keyFieldValue] = [ splittedMessages[0] ] else: newSymbols[keyFieldValue].messages.append(message) splittedMessages = DataAlignment.align([message.data], field, encoded=False) newSymbolsSplittedMessages[keyFieldValue].append( splittedMessages[0]) for newSymbolKeyValue, newSymbol in newSymbols.iteritems(): # we recreate the same fields in this new symbol as the fields that exist in the original symbol newSymbol.clearFields() for i, f in enumerate(field.fields): if f == keyField: newFieldDomain = TypeConverter.convert( newSymbolKeyValue, keyFieldType, Raw) else: newFieldDomain = set() for j in range( len(newSymbolsSplittedMessages[newSymbolKeyValue]) ): newFieldDomain.add( newSymbolsSplittedMessages[newSymbolKeyValue][j] [i]) newFieldDomain = list(newFieldDomain) newF = Field(name=f.name, domain=newFieldDomain) newF.parent = newSymbol newSymbol.fields.append(newF) # we remove endless fields that accepts no values cells = newSymbol.getCells(encoded=False, styled=False, transposed=False) max_i_cell_with_value = 0 for line in cells: for i_cell, cell in enumerate(line): if cell != '' and max_i_cell_with_value < i_cell: max_i_cell_with_value = i_cell newSymbol.clearFields() for i, f in enumerate(field.fields[:max_i_cell_with_value + 1]): if f == keyField: newFieldDomain = TypeConverter.convert( newSymbolKeyValue, keyFieldType, Raw) else: newFieldDomain = set() for j in range( len(newSymbolsSplittedMessages[newSymbolKeyValue]) ): newFieldDomain.add( newSymbolsSplittedMessages[newSymbolKeyValue][j] [i]) newFieldDomain = list(newFieldDomain) newF = Field(name=f.name, domain=newFieldDomain) newF.parent = newSymbol newSymbol.fields.append(newF) return newSymbols