def _createSubFieldsForADynamicField(self, field, align, semanticTags): """Analyzes the dynamic field provided and create sub fields following the provided semantic tags.""" if field is None: raise TypeError("Field cannot be None") if align is None: raise TypeError("Align cannot be None") if semanticTags is None: raise TypeError("SemanticTags cannot be None") self._logger.debug("Create subfields for dynamic field {0} : {1}".format(field.name, field.regex)) subFields = [] currentTag = None currentTagLength = 0 semanticTagsForEachMessage = field.getSemanticTagsByMessage() for index, tag in semanticTags.iteritems(): if tag != currentTag: # Create a sub field if currentTagLength > 0: values = self._getFieldValuesWithTag(field, semanticTagsForEachMessage, currentTag) subFields.append((currentTag, values)) currentTagLength = 0 currentTag = tag currentTagLength += 1 if currentTagLength > 0: values = self._getFieldValuesWithTag(field, semanticTagsForEachMessage, currentTag) subFields.append((currentTag, values)) self._logger.debug("Identified subFields : {0}".format(subFields)) for iSubField, (tag, values) in enumerate(subFields): if len(values) > 0: if tag == "None": minValue = None maxValue = None for v in values: if minValue is None or len(v) < minValue: minValue = len(v) if maxValue is None or len(v) > maxValue: maxValue = len(v) subField = Field("{0}_{1}".format(field.getName(), iSubField), "(.{" + str(minValue) + "," + str(maxValue) + "})", field.getSymbol()) field.addLocalField(subField) else: # create regex based on unique values newRegex = '|'.join(list(set(values))) newRegex = "({0})".format(newRegex) subField = Field("{0}_{1}".format(field.getName(), iSubField), newRegex, field.getSymbol()) field.addLocalField(subField)
def createSubFieldsForAStaticField(self, field, align, semanticTags): """createSubFieldsForAStaticField: Analyzes the static field provided and create sub fields following the provided semantic tags.""" self._logger.debug("Create subfields for static field {0} : {1}".format(field.getName(), align)) if len(field.getLocalFields()) > 0: self._logger.warning("Impossible to create sub fields for this field since its not cleaned") return subFields = [] currentTag = None currentTagLength = 0 for index, tag in semanticTags.iteritems(): if tag != currentTag: # Create a sub field subFieldValue = align[index - currentTagLength:index] if len(subFieldValue) > 0: subFields.append(subFieldValue) currentTagLength = 0 currentTag = tag currentTagLength += 1 if currentTagLength > 0: subFieldValue = align[-currentTagLength:] if len(subFieldValue) > 0: subFields.append(subFieldValue) if len(subFields) > 1: for iSubField, subFieldValue in enumerate(subFields): subField = Field("{0}_{1}".format(field.getName(), iSubField), "({0})".format(subFieldValue), field.getSymbol()) field.addLocalField(subField)
def _splitFieldFollowingAlignment(self, field, align): """Update the field definition with new fields following the specified align.""" # STEP 1 : Create a field separation based on static and dynamic fields leftAlign, rightAlign = self._splitAlignment(align) splited = self._mergeAlign(leftAlign, rightAlign) step1Fields = [] for (entryVal, entryDyn) in splited: if entryDyn: newField = Field(Raw(nbBytes=(0, len(entryVal) / 2))) else: newField = Field(Raw(TypeConverter.convert(entryVal, HexaString, Raw))) step1Fields.append(newField) for f in step1Fields: f.encodingFunctions = field.encodingFunctions.values() field.fields = step1Fields
def __init__(self, fields=None, messages=None, name="Symbol"): """ :keyword fields: the fields which participate in symbol definition :type fields: a :class:`list` of :class:`netzob.Common.Models.Vocabulary.Field` :keyword messages: the message that represent the symbol :type messages: a :class:`list` of :class:`netzob.Common.Models.Vocabulary.Messages.AbstractMessage.AbstractMessage` :keyword name: the name of the symbol :type name: :class:`str` """ super(Symbol, self).__init__(name, True) self.__messages = TypedList(AbstractMessage) if messages is None: messages = [] self.messages = messages if fields is None: # create a default empty field fields = [Field()] self.fields = fields
def reset(self, field): """Resets the format (field hierarchy and definition domain) of the specified field. :param field: the field we want to reset :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :raise Exception if something bad happens """ if field is None: raise TypeError("The field to reset must be specified and cannot be None") self._logger.debug("Reset the definition of field {0} ({1})".format(field.name, field.id)) field.clearFields() if isinstance(field, Symbol): field.fields = [Field()] if isinstance(field, Field): field.domain = Raw(None) field.regex = NetzobRegex.buildDefaultRegex()
def mergeFields(self, field1, field2): """Merge specified fields. >>> import binascii >>> from netzob.all import * >>> samples = ["00ff2f000000", "000010000000", "00fe1f000000"] >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples] >>> f1 = Field(Raw(nbBytes=1), name="f1") >>> f2 = Field(Raw(nbBytes=2), name="f2") >>> f3 = Field(Raw(nbBytes=2), name="f3") >>> f4 = Field(Raw(nbBytes=1), name="f4") >>> symbol = Symbol([f1, f2, f3, f4], messages=messages) >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString)) >>> print symbol f1 | f2 | f3 | f4 ---- | ------ | ------ | ---- '00' | 'ff2f' | '0000' | '00' '00' | '0010' | '0000' | '00' '00' | 'fe1f' | '0000' | '00' ---- | ------ | ------ | ---- >>> fo = FieldOperations() >>> fo.mergeFields(f2, f3) >>> print symbol f1 | Merge | f4 ---- | ---------- | ---- '00' | 'ff2f0000' | '00' '00' | '00100000' | '00' '00' | 'fe1f0000' | '00' ---- | ---------- | ---- >>> fo.mergeFields(symbol.fields[0], symbol.fields[1]) >>> print symbol Merge | f4 ------------ | ---- '00ff2f0000' | '00' '0000100000' | '00' '00fe1f0000' | '00' ------------ | ---- >>> fo.mergeFields(symbol.fields[0], symbol.fields[1]) >>> print symbol Merge -------------- '00ff2f000000' '000010000000' '00fe1f000000' -------------- :param field1: the left field to merge :type field1: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :param field2: the right field to merge :type field2: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :raise Exception if something bad happens """ if field1 is None or field2 is None: raise TypeError("Fields cannot be None") if field1 == field2: raise ValueError("Cannot merge a unique field (field1 == field2)") self._logger.debug("Merging field {0} with field {1}".format(field1.name, field2.name)) if field1.parent is not field2.parent: raise ValueError("Specified fields don't have the same parent, only fields with same parents can be merged.") # retrieve indexes of specified fields iField1 = None iField2 = None for iField, field in enumerate(field1.parent.fields): if field == field1: iField1 = iField elif field == field2: iField2 = iField if iField1 is None: raise ValueError("Cannot retrieve position of field1 in its parent fields") if iField2 is None: raise ValueError("Cannot retrieve position of field2 in its parent fields") if iField2 != iField1 + 1: raise ValueError("Field1 must be directly on the left of field2 (iField1={0}, iField2={1})".format(iField1, iField2)) # build a new field domain newDomain = Agg([field1.domain, field2.domain]) newField = Field(domain=newDomain, name="Merge") newField.encodingFunctions = field1.encodingFunctions.values() parent = field1.parent before = parent.fields[:iField1] after = parent.fields[iField2 + 1:] parent.fields = before + [newField] + after
def split(field, delimiter): """Split a field (or symbol) with a specific delimiter. The delimiter can be passed either as an ASCII, a Raw, an HexaString, or any objects that inherit from AbstractType. >>> from netzob.all import * >>> samples = ["aaaaff000000ff10", "bbff110010ff00000011", "ccccccccfffe1f000000ff12"] >>> messages = [RawMessage(data=sample) for sample in samples] >>> symbol = Symbol(messages=messages[:3]) >>> Format.splitDelimiter(symbol, ASCII("ff")) >>> print symbol Field-0 | Field-sep-6666 | Field-2 | Field-sep-6666 | Field-4 ---------- | -------------- | ------------ | -------------- | ---------- 'aaaa' | 'ff' | '000000' | 'ff' | '10' 'bb' | 'ff' | '110010' | 'ff' | '00000011' 'cccccccc' | 'ff' | 'fe1f000000' | 'ff' | '12' ---------- | -------------- | ------------ | -------------- | ---------- >>> samples = ["434d446964656e74696679230400000066726564", "5245536964656e74696679230000000000000000", "434d44696e666f2300000000", "524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e7469667923090000006d7950617373776421","52455361757468656e74696679230000000000000000","434d44656e6372797074230a00000031323334353674657374","524553656e637279707423000000000a00000073707176777436273136","434d4464656372797074230a00000073707176777436273136","5245536465637279707423000000000a00000031323334353674657374","434d446279652300000000","524553627965230000000000000000","434d446964656e746966792307000000526f626572746f","5245536964656e74696679230000000000000000","434d44696e666f2300000000","524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e74696679230a000000615374726f6e67507764","52455361757468656e74696679230000000000000000","434d44656e63727970742306000000616263646566","524553656e6372797074230000000006000000232021262724","434d44646563727970742306000000232021262724","52455364656372797074230000000006000000616263646566","434d446279652300000000","524553627965230000000000000000"] >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples] >>> symbol = Symbol(messages=messages) >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII)) # Change visualization to hexastring >>> Format.splitDelimiter(symbol, ASCII("#")) >>> print symbol Field-0 | Field-sep-23 | Field-2 | Field-sep-23 | Field-4 --------------- | ------------ | -------------------- | ------------ | ------- 'CMDidentify' | '#' | '....fred' | '' | '' 'RESidentify' | '#' | '........' | '' | '' 'CMDinfo' | '#' | '....' | '' | '' 'RESinfo' | '#' | '........info' | '' | '' 'CMDstats' | '#' | '....' | '' | '' 'RESstats' | '#' | '........stats' | '' | '' 'CMDauthentify' | '#' | '....myPasswd!' | '' | '' 'RESauthentify' | '#' | '........' | '' | '' 'CMDencrypt' | '#' | '....123456test' | '' | '' 'RESencrypt' | '#' | "........spqvwt6'16" | '' | '' 'CMDdecrypt' | '#' | "....spqvwt6'16" | '' | '' 'RESdecrypt' | '#' | '........123456test' | '' | '' 'CMDbye' | '#' | '....' | '' | '' 'RESbye' | '#' | '........' | '' | '' 'CMDidentify' | '#' | '....Roberto' | '' | '' 'RESidentify' | '#' | '........' | '' | '' 'CMDinfo' | '#' | '....' | '' | '' 'RESinfo' | '#' | '........info' | '' | '' 'CMDstats' | '#' | '....' | '' | '' 'RESstats' | '#' | '........stats' | '' | '' 'CMDauthentify' | '#' | '....aStrongPwd' | '' | '' 'RESauthentify' | '#' | '........' | '' | '' 'CMDencrypt' | '#' | '....abcdef' | '' | '' 'RESencrypt' | '#' | '........' | '#' | " !&'$" 'CMDdecrypt' | '#' | '....' | '#' | " !&'$" 'RESdecrypt' | '#' | '........abcdef' | '' | '' 'CMDbye' | '#' | '....' | '' | '' 'RESbye' | '#' | '........' | '' | '' --------------- | ------------ | -------------------- | ------------ | ------- >>> print symbol.fields[0]._str_debug() Field-0 |-- Alt |-- Data (Raw='CMDidentify' ((0, 88))) |-- Data (Raw='RESidentify' ((0, 88))) |-- Data (Raw='CMDinfo' ((0, 56))) |-- Data (Raw='RESinfo' ((0, 56))) |-- Data (Raw='CMDstats' ((0, 64))) |-- Data (Raw='RESstats' ((0, 64))) |-- Data (Raw='CMDauthentify' ((0, 104))) |-- Data (Raw='RESauthentify' ((0, 104))) |-- Data (Raw='CMDencrypt' ((0, 80))) |-- Data (Raw='RESencrypt' ((0, 80))) |-- Data (Raw='CMDdecrypt' ((0, 80))) |-- Data (Raw='RESdecrypt' ((0, 80))) |-- Data (Raw='CMDbye' ((0, 48))) |-- Data (Raw='RESbye' ((0, 48))) :param field : the field to consider when spliting :type: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :param delimiter : the delimiter used to split messages of the field :type: :class:`netzob.Common.Models.Types.AbstractType.AbstractType` """ if delimiter is None: raise TypeError("Delimiter cannot be None.") if field is None: raise TypeError("Field cannot be None.") if len(field.messages) < 1: raise ValueError( "The associated symbol does not contain any message.") # Find message substrings after applying delimiter splittedMessages = [] for cell in field.getValues(encoded=False, styled=False): splittedMessage = cell.split(delimiter.value.tobytes()) splittedMessages.append(splittedMessage) import itertools # Inverse the array, so that columns contains observed values for each field splittedMessages = list(itertools.izip_longest(*splittedMessages)) # If the delimiter does not create splitted fields if len(splittedMessages) <= 1: return # Else, we add (2*len(splittedMessages)-1) fields newFields = [] iField = -1 for i in range(len(splittedMessages)): iField += 1 fieldDomain = list() # temporary set that hosts all the observed values to prevent useless duplicate ones observedValues = set() has_inserted_empty_value = False isEmptyField = True # To avoid adding an empty field for v in splittedMessages[i]: if v != "" and v is not None: isEmptyField = False if v not in observedValues: fieldDomain.append(Raw(v)) observedValues.add(v) else: if not has_inserted_empty_value: fieldDomain.append(Raw(nbBytes=0)) has_inserted_empty_value = True if not isEmptyField: newField = Field( domain=DomainFactory.normalizeDomain(fieldDomain), name="Field-" + str(iField)) newField.encodingFunctions = field.encodingFunctions.values() newFields.append(newField) iField += 1 fieldName = "Field-sep-" + TypeConverter.convert( delimiter.value, BitArray, HexaString) newFields.append( Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName)) newFields.pop() # Reset the field from netzob.Inference.Vocabulary.Format import Format Format.resetFormat(field) # Create a field for each entry field.fields = newFields
def cluster(self, field, keyField): """Create and return new symbols according to a specific key field. >>> import binascii >>> from netzob.all import * >>> samples = ["00ff2f000000", "000020000000", "00ff2f000000"] >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples] >>> f1 = Field(Raw(nbBytes=1)) >>> f2 = Field(Raw(nbBytes=2)) >>> f3 = Field(Raw(nbBytes=3)) >>> symbol = Symbol([f1, f2, f3], messages=messages) >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString)) >>> newSymbols = Format.clusterByKeyField(symbol, f2) >>> for sym in newSymbols.values(): ... sym.addEncodingFunction(TypeEncodingFunction(HexaString)) ... print sym.name + ":" ... print sym Symbol_ff2f: Field | Field | Field ----- | ------ | -------- '00' | 'ff2f' | '000000' '00' | 'ff2f' | '000000' ----- | ------ | -------- Symbol_0020: Field | Field | Field ----- | ------ | -------- '00' | '0020' | '000000' ----- | ------ | -------- :param field: the field we want to split in new symbols :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :param keyField: the field used as a key during the splitting operation :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :raise Exception if something bad happens """ # Safe checks if field is None: raise TypeError("'field' should not be None") if keyField is None: raise TypeError("'keyField' should not be None") if keyField not in field.fields: raise TypeError("'keyField' is not a child of 'field'") newSymbols = {} keyFieldMessageValues = keyField.getMessageValues(encoded=False, styled=False) newSymbolsSplittedMessages = {} # we identify what would be the best type of the key field keyFieldType = ASCII for message, keyFieldValue in keyFieldMessageValues.iteritems(): # If the value cannot be parsed as ASCII, we convert it to HexaString if not ASCII().canParse(TypeConverter.convert(keyFieldValue, Raw, BitArray)): keyFieldType = HexaString break # Even if the value is theoritically parsable as ASCII, some caracters cannot be encoded, so we double check tmp_value = TypeConverter.convert(keyFieldValue, Raw, ASCII) tmp2_value = TypeConverter.convert(tmp_value, ASCII, Raw) if keyFieldValue != tmp2_value: # This means we cannot retrieve the original value by encoding and then decoding in ASCII keyFieldType = HexaString break # we create a symbol for each of these uniq values for message, keyFieldValue in keyFieldMessageValues.iteritems(): keyFieldValue = TypeConverter.convert(keyFieldValue, Raw, keyFieldType) if keyFieldValue not in newSymbols.keys(): symbolName = "Symbol_{0}".format(keyFieldValue) newSymbols[keyFieldValue] = Symbol(name=symbolName, messages=[message]) splittedMessages = DataAlignment.align([message.data], field, encoded=False) newSymbolsSplittedMessages[keyFieldValue] = [splittedMessages[0]] else: newSymbols[keyFieldValue].messages.append(message) splittedMessages = DataAlignment.align([message.data], field, encoded=False) newSymbolsSplittedMessages[keyFieldValue].append(splittedMessages[0]) for newSymbolKeyValue, newSymbol in newSymbols.iteritems(): # we recreate the same fields in this new symbol as the fields that exist in the original symbol newSymbol.clearFields() for i, f in enumerate(field.fields): if f == keyField: newFieldDomain = TypeConverter.convert(newSymbolKeyValue, keyFieldType, Raw) else: newFieldDomain = set() for j in range(len(newSymbolsSplittedMessages[newSymbolKeyValue])): newFieldDomain.add(newSymbolsSplittedMessages[newSymbolKeyValue][j][i]) newFieldDomain = list(newFieldDomain) newF = Field(name=f.name, domain=newFieldDomain) newF.parent = newSymbol newSymbol.fields.append(newF) # we remove endless fields that accepts no values cells = newSymbol.getCells(encoded=False, styled=False, transposed=False) max_i_cell_with_value = 0 for line in cells: for i_cell, cell in enumerate(line): if cell != '' and max_i_cell_with_value < i_cell: max_i_cell_with_value = i_cell newSymbol.clearFields() for i, f in enumerate(field.fields[:max_i_cell_with_value + 1]): if f == keyField: newFieldDomain = TypeConverter.convert(newSymbolKeyValue, keyFieldType, Raw) else: newFieldDomain = set() for j in range(len(newSymbolsSplittedMessages[newSymbolKeyValue])): newFieldDomain.add(newSymbolsSplittedMessages[newSymbolKeyValue][j][i]) newFieldDomain = list(newFieldDomain) newF = Field(name=f.name, domain=newFieldDomain) newF.parent = newSymbol newSymbol.fields.append(newF) return newSymbols
def execute(self, field): """Executes the field edition following the specified messages. Children of the specified field will be replaced with new fields. :param field: the format definition that will be user :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :raise Exception: if something bad happens """ if field is None: raise TypeError("The field cannot be None") fieldValues = [ TypeConverter.convert(data, Raw, HexaString) for data in field.getValues(encoded=False) ] if len(fieldValues) == 0: raise Exception("No value found in the field.") # Retrieve longuest field value maxLengthFieldValue = len(max(fieldValues, key=len)) # definies the step following specified unitsize stepUnitsize = self.__computeStepForUnitsize() # Vertical identification of variation indexedValues = [] for i in range(0, maxLengthFieldValue, stepUnitsize): currentIndexValue = [] for fieldValue in fieldValues: if i < len(fieldValue): currentIndexValue.append( fieldValue[i:min(len(fieldValue), i + stepUnitsize)]) else: currentIndexValue.append('') indexedValues.append(currentIndexValue) # If requested, merges the adjacent static fields if self.mergeAdjacentStaticFields: result = [] staticSequences = [] for values in indexedValues: if len(set(values)) == 1: # static staticSequences.append(values[0]) else: # dynamic if len(staticSequences) > 0: result.append([''.join(staticSequences)]) staticSequences = [] result.append(values) if len(staticSequences) > 0: result.append([''.join(staticSequences)]) indexedValues = result # If requested, merges the adjacent dynamic fields if self.mergeAdjacentDynamicFields: result = [] dynamicSequences = [] for values in indexedValues: if len(set(values)) > 1: # dynamic dynamicSequences.append(values) else: # static if len(dynamicSequences) > 0: dynValues = map(None, *dynamicSequences) tmp_result = [] for d in dynValues: tmp_result.append(''.join( [x if x is not None else '' for x in d])) result.append(tmp_result) dynamicSequences = [] result.append(values) if len(dynamicSequences) > 0: dynValues = map(None, *dynamicSequences) tmp_result = [] for d in dynValues: tmp_result.append(''.join( [x if x is not None else '' for x in d])) result.append(tmp_result) indexedValues = result # Create a field for each entry newFields = [] for (i, val) in enumerate(indexedValues): fName = "Field-{0}".format(i) fDomain = DomainFactory.normalizeDomain([ Raw(TypeConverter.convert(v, HexaString, BitArray)) for v in set(val) ]) newFields.append(Field(domain=fDomain, name=fName)) # attach encoding functions for newField in newFields: newField.encodingFunctions = field.encodingFunctions.values() field.fields = newFields
def split(field, delimiter): """Split a field (or symbol) with a specific delimiter. The delimiter can be passed either as an ASCII, a Raw, an HexaString, or any objects that inherit from AbstractType. >>> from netzob.all import * >>> samples = ["aaaaff000000ff10", "bbff110010ff00000011", "ccccccccfffe1f000000ff12"] >>> messages = [RawMessage(data=sample) for sample in samples] >>> symbol = Symbol(messages=messages[:3]) >>> Format.splitDelimiter(symbol, ASCII("ff")) >>> print symbol 'aaaa' | 'ff' | '000000' | 'ff' | '10' 'bb' | 'ff' | '110010' | 'ff' | '00000011' 'cccccccc' | 'ff' | 'fe1f000000' | 'ff' | '12' >>> samples = ["434d446964656e74696679230400000066726564", "5245536964656e74696679230000000000000000", "434d44696e666f2300000000", "524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e7469667923090000006d7950617373776421","52455361757468656e74696679230000000000000000","434d44656e6372797074230a00000031323334353674657374","524553656e637279707423000000000a00000073707176777436273136","434d4464656372797074230a00000073707176777436273136","5245536465637279707423000000000a00000031323334353674657374","434d446279652300000000","524553627965230000000000000000","434d446964656e746966792307000000526f626572746f","5245536964656e74696679230000000000000000","434d44696e666f2300000000","524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e74696679230a000000615374726f6e67507764","52455361757468656e74696679230000000000000000","434d44656e63727970742306000000616263646566","524553656e6372797074230000000006000000232021262724","434d44646563727970742306000000232021262724","52455364656372797074230000000006000000616263646566","434d446279652300000000","524553627965230000000000000000"] >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples] >>> symbol = Symbol(messages=messages) >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII)) # Change visualization to hexastring >>> Format.splitDelimiter(symbol, ASCII("#")) >>> print symbol 'CMDidentify' | '#' | '....fred' | '' | '' 'RESidentify' | '#' | '........' | '' | '' 'CMDinfo' | '#' | '....' | '' | '' 'RESinfo' | '#' | '........info' | '' | '' 'CMDstats' | '#' | '....' | '' | '' 'RESstats' | '#' | '........stats' | '' | '' 'CMDauthentify' | '#' | '....myPasswd!' | '' | '' 'RESauthentify' | '#' | '........' | '' | '' 'CMDencrypt' | '#' | '....123456test' | '' | '' 'RESencrypt' | '#' | "........spqvwt6'16" | '' | '' 'CMDdecrypt' | '#' | "....spqvwt6'16" | '' | '' 'RESdecrypt' | '#' | '........123456test' | '' | '' 'CMDbye' | '#' | '....' | '' | '' 'RESbye' | '#' | '........' | '' | '' 'CMDidentify' | '#' | '....Roberto' | '' | '' 'RESidentify' | '#' | '........' | '' | '' 'CMDinfo' | '#' | '....' | '' | '' 'RESinfo' | '#' | '........info' | '' | '' 'CMDstats' | '#' | '....' | '' | '' 'RESstats' | '#' | '........stats' | '' | '' 'CMDauthentify' | '#' | '....aStrongPwd' | '' | '' 'RESauthentify' | '#' | '........' | '' | '' 'CMDencrypt' | '#' | '....abcdef' | '' | '' 'RESencrypt' | '#' | '........' | '#' | " !&'$" 'CMDdecrypt' | '#' | '....' | '#' | " !&'$" 'RESdecrypt' | '#' | '........abcdef' | '' | '' 'CMDbye' | '#' | '....' | '' | '' 'RESbye' | '#' | '........' | '' | '' :param field : the field to consider when spliting :type: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :param delimiter : the delimiter used to split messages of the field :type: :class:`netzob.Common.Models.Types.AbstractType.AbstractType` """ if delimiter is None: raise TypeError("Delimiter cannot be None.") if field is None: raise TypeError("Field cannot be None.") if len(field.messages) < 1: raise ValueError("The associated symbol does not contain any message.") # Find message substrings after applying delimiter splittedMessages = [] for cell in field.getValues(encoded=False, styled=False): splittedMessage = cell.split(delimiter.value.tobytes()) splittedMessages.append(splittedMessage) import itertools # Inverse the array, so that columns contains observed values for each field splittedMessages = list(itertools.izip_longest(*splittedMessages)) # If the delimiter does not create splitted fields if len(splittedMessages) <= 1: return # Else, we add (2*len(splittedMessages)-1) fields newFields = [] iField = -1 for i in range(len(splittedMessages)): iField += 1 fieldDomain = set() isEmptyField = True # To avoid adding an empty field emptyValueFound = False for v in splittedMessages[i]: if v != "" and v is not None: isEmptyField = False fieldDomain.add(Raw(v)) else: fieldDomain.add(Raw(nbBytes=0)) if not isEmptyField: fieldDomain = list(fieldDomain) newField = Field(domain=DomainFactory.normalizeDomain(fieldDomain), name="Field-"+str(iField)) newField.encodingFunctions = field.encodingFunctions.values() newFields.append(newField) iField += 1 fieldName = "Field-sep-" + TypeConverter.convert(delimiter.value, BitArray, HexaString) newFields.append(Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName)) newFields.pop() # Reset the field from netzob.Inference.Vocabulary.Format import Format Format.resetFormat(field) # Create a field for each entry field.fields = newFields
def cluster(self, field, keyField): """Create and return new symbols according to a specific key field. >>> import binascii >>> from netzob.all import * >>> samples = ["00ff2f000000", "000020000000", "00ff2f000000"] >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples] >>> f1 = Field(Raw(nbBytes=1)) >>> f2 = Field(Raw(nbBytes=2)) >>> f3 = Field(Raw(nbBytes=3)) >>> symbol = Symbol([f1, f2, f3], messages=messages) >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString)) >>> newSymbols = Format.clusterByKeyField(symbol, f2) >>> for sym in newSymbols.values(): ... sym.addEncodingFunction(TypeEncodingFunction(HexaString)) ... print sym.name + ":" ... print sym Symbol_ff2f: Field | Field | Field ----- | ------ | -------- '00' | 'ff2f' | '000000' '00' | 'ff2f' | '000000' ----- | ------ | -------- Symbol_0020: Field | Field | Field ----- | ------ | -------- '00' | '0020' | '000000' ----- | ------ | -------- :param field: the field we want to split in new symbols :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :param keyField: the field used as a key during the splitting operation :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField` :raise Exception if something bad happens """ # Safe checks if field is None: raise TypeError("'field' should not be None") if keyField is None: raise TypeError("'keyField' should not be None") if keyField not in field.fields: raise TypeError("'keyField' is not a child of 'field'") newSymbols = {} keyFieldMessageValues = keyField.getMessageValues(encoded=False, styled=False) newSymbolsSplittedMessages = {} # we identify what would be the best type of the key field keyFieldType = ASCII for message, keyFieldValue in keyFieldMessageValues.iteritems(): # If the value cannot be parsed as ASCII, we convert it to HexaString if not ASCII().canParse( TypeConverter.convert(keyFieldValue, Raw, BitArray)): keyFieldType = HexaString break # Even if the value is theoritically parsable as ASCII, some caracters cannot be encoded, so we double check tmp_value = TypeConverter.convert(keyFieldValue, Raw, ASCII) tmp2_value = TypeConverter.convert(tmp_value, ASCII, Raw) if keyFieldValue != tmp2_value: # This means we cannot retrieve the original value by encoding and then decoding in ASCII keyFieldType = HexaString break # we create a symbol for each of these uniq values for message, keyFieldValue in keyFieldMessageValues.iteritems(): keyFieldValue = TypeConverter.convert(keyFieldValue, Raw, keyFieldType) if keyFieldValue not in newSymbols.keys(): symbolName = "Symbol_{0}".format(keyFieldValue) newSymbols[keyFieldValue] = Symbol(name=symbolName, messages=[message]) splittedMessages = DataAlignment.align([message.data], field, encoded=False) newSymbolsSplittedMessages[keyFieldValue] = [ splittedMessages[0] ] else: newSymbols[keyFieldValue].messages.append(message) splittedMessages = DataAlignment.align([message.data], field, encoded=False) newSymbolsSplittedMessages[keyFieldValue].append( splittedMessages[0]) for newSymbolKeyValue, newSymbol in newSymbols.iteritems(): # we recreate the same fields in this new symbol as the fields that exist in the original symbol newSymbol.clearFields() for i, f in enumerate(field.fields): if f == keyField: newFieldDomain = TypeConverter.convert( newSymbolKeyValue, keyFieldType, Raw) else: newFieldDomain = set() for j in range( len(newSymbolsSplittedMessages[newSymbolKeyValue]) ): newFieldDomain.add( newSymbolsSplittedMessages[newSymbolKeyValue][j] [i]) newFieldDomain = list(newFieldDomain) newF = Field(name=f.name, domain=newFieldDomain) newF.parent = newSymbol newSymbol.fields.append(newF) # we remove endless fields that accepts no values cells = newSymbol.getCells(encoded=False, styled=False, transposed=False) max_i_cell_with_value = 0 for line in cells: for i_cell, cell in enumerate(line): if cell != '' and max_i_cell_with_value < i_cell: max_i_cell_with_value = i_cell newSymbol.clearFields() for i, f in enumerate(field.fields[:max_i_cell_with_value + 1]): if f == keyField: newFieldDomain = TypeConverter.convert( newSymbolKeyValue, keyFieldType, Raw) else: newFieldDomain = set() for j in range( len(newSymbolsSplittedMessages[newSymbolKeyValue]) ): newFieldDomain.add( newSymbolsSplittedMessages[newSymbolKeyValue][j] [i]) newFieldDomain = list(newFieldDomain) newF = Field(name=f.name, domain=newFieldDomain) newF.parent = newSymbol newSymbol.fields.append(newF) return newSymbols