Esempio n. 1
0
    def _createSubFieldsForADynamicField(self, field, align, semanticTags):
        """Analyzes the dynamic field provided and create sub fields following
        the provided semantic tags."""

        if field is None:
            raise TypeError("Field cannot be None")
        if align is None:
            raise TypeError("Align cannot be None")
        if semanticTags is None:
            raise TypeError("SemanticTags cannot be None")

        self._logger.debug("Create subfields for dynamic field {0} : {1}".format(field.name, field.regex))

        subFields = []

        currentTag = None
        currentTagLength = 0

        semanticTagsForEachMessage = field.getSemanticTagsByMessage()

        for index, tag in semanticTags.iteritems():
            if tag != currentTag:
                # Create a sub field
                if currentTagLength > 0:
                    values = self._getFieldValuesWithTag(field, semanticTagsForEachMessage, currentTag)
                    subFields.append((currentTag, values))
                currentTagLength = 0
            currentTag = tag
            currentTagLength += 1
        if currentTagLength > 0:
            values = self._getFieldValuesWithTag(field, semanticTagsForEachMessage, currentTag)
            subFields.append((currentTag, values))

        self._logger.debug("Identified subFields : {0}".format(subFields))

        for iSubField, (tag, values) in enumerate(subFields):
            if len(values) > 0:
                if tag == "None":
                    minValue = None
                    maxValue = None
                    for v in values:
                        if minValue is None or len(v) < minValue:
                            minValue = len(v)
                        if maxValue is None or len(v) > maxValue:
                            maxValue = len(v)
                    subField = Field("{0}_{1}".format(field.getName(), iSubField), "(.{" + str(minValue) + "," + str(maxValue) + "})", field.getSymbol())

                    field.addLocalField(subField)
                else:
                    # create regex based on unique values
                    newRegex = '|'.join(list(set(values)))
                    newRegex = "({0})".format(newRegex)
                    subField = Field("{0}_{1}".format(field.getName(), iSubField), newRegex, field.getSymbol())
                    field.addLocalField(subField)
Esempio n. 2
0
    def createSubFieldsForAStaticField(self, field, align, semanticTags):
        """createSubFieldsForAStaticField:
        Analyzes the static field provided and create sub fields following
        the provided semantic tags."""
        self._logger.debug("Create subfields for static field {0} : {1}".format(field.getName(), align))

        if len(field.getLocalFields()) > 0:
            self._logger.warning("Impossible to create sub fields for this field since its not cleaned")
            return

        subFields = []

        currentTag = None
        currentTagLength = 0

        for index, tag in semanticTags.iteritems():
            if tag != currentTag:
                # Create a sub field
                subFieldValue = align[index - currentTagLength:index]
                if len(subFieldValue) > 0:
                    subFields.append(subFieldValue)
                currentTagLength = 0
            currentTag = tag
            currentTagLength += 1
        if currentTagLength > 0:
            subFieldValue = align[-currentTagLength:]
            if len(subFieldValue) > 0:
                subFields.append(subFieldValue)

        if len(subFields) > 1:
            for iSubField, subFieldValue in enumerate(subFields):
                subField = Field("{0}_{1}".format(field.getName(), iSubField), "({0})".format(subFieldValue), field.getSymbol())
                field.addLocalField(subField)
Esempio n. 3
0
    def _splitFieldFollowingAlignment(self, field, align):
        """Update the field definition with new fields following the
        specified align."""

        # STEP 1 : Create a field separation based on static and dynamic fields
        leftAlign, rightAlign = self._splitAlignment(align)
        splited = self._mergeAlign(leftAlign, rightAlign)
        step1Fields = []

        for (entryVal, entryDyn) in splited:
            if entryDyn:
                newField = Field(Raw(nbBytes=(0, len(entryVal) / 2)))
            else:
                newField = Field(Raw(TypeConverter.convert(entryVal, HexaString, Raw)))
            step1Fields.append(newField)

        for f in step1Fields:
            f.encodingFunctions = field.encodingFunctions.values()

        field.fields = step1Fields
Esempio n. 4
0
 def __init__(self, fields=None, messages=None, name="Symbol"):
     """
     :keyword fields: the fields which participate in symbol definition
     :type fields: a :class:`list` of :class:`netzob.Common.Models.Vocabulary.Field`
     :keyword messages: the message that represent the symbol
     :type messages: a :class:`list` of :class:`netzob.Common.Models.Vocabulary.Messages.AbstractMessage.AbstractMessage`
     :keyword name: the name of the symbol
     :type name: :class:`str`
     """
     super(Symbol, self).__init__(name, True)
     self.__messages = TypedList(AbstractMessage)
     if messages is None:
         messages = []
     self.messages = messages
     if fields is None:
         # create a default empty field
         fields = [Field()]
     self.fields = fields
Esempio n. 5
0
    def reset(self, field):
        """Resets the format (field hierarchy and definition domain) of
        the specified field.


        :param field: the field we want to reset
        :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :raise Exception if something bad happens
        """

        if field is None:
            raise TypeError("The field to reset must be specified and cannot be None")

        self._logger.debug("Reset the definition of field {0} ({1})".format(field.name, field.id))
        field.clearFields()

        if isinstance(field, Symbol):
            field.fields = [Field()]

        if isinstance(field, Field):
            field.domain = Raw(None)
            field.regex = NetzobRegex.buildDefaultRegex()
Esempio n. 6
0
    def mergeFields(self, field1, field2):
        """Merge specified fields.

        >>> import binascii
        >>> from netzob.all import *
        >>> samples = ["00ff2f000000", "000010000000",	"00fe1f000000"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> f1 = Field(Raw(nbBytes=1), name="f1")
        >>> f2 = Field(Raw(nbBytes=2), name="f2")
        >>> f3 = Field(Raw(nbBytes=2), name="f3")
        >>> f4 = Field(Raw(nbBytes=1), name="f4")
        >>> symbol = Symbol([f1, f2, f3, f4], messages=messages)
        >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString))

        >>> print symbol
        f1   | f2     | f3     | f4  
        ---- | ------ | ------ | ----
        '00' | 'ff2f' | '0000' | '00'
        '00' | '0010' | '0000' | '00'
        '00' | 'fe1f' | '0000' | '00'
        ---- | ------ | ------ | ----
        
        >>> fo = FieldOperations()
        >>> fo.mergeFields(f2, f3)
        >>> print symbol
        f1   | Merge      | f4  
        ---- | ---------- | ----
        '00' | 'ff2f0000' | '00'
        '00' | '00100000' | '00'
        '00' | 'fe1f0000' | '00'
        ---- | ---------- | ----

        >>> fo.mergeFields(symbol.fields[0], symbol.fields[1])
        >>> print symbol
        Merge        | f4  
        ------------ | ----
        '00ff2f0000' | '00'
        '0000100000' | '00'
        '00fe1f0000' | '00'
        ------------ | ----
        
        >>> fo.mergeFields(symbol.fields[0], symbol.fields[1])
        >>> print symbol
        Merge         
        --------------
        '00ff2f000000'
        '000010000000'
        '00fe1f000000'
        --------------
        
        :param field1: the left field to merge
        :type field1: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :param field2: the right field to merge
        :type field2: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`

        :raise Exception if something bad happens
        """

        if field1 is None or field2 is None:
            raise TypeError("Fields cannot be None")

        if field1 == field2:
            raise ValueError("Cannot merge a unique field (field1 == field2)")

        self._logger.debug("Merging field {0} with field {1}".format(field1.name, field2.name))

        if field1.parent is not field2.parent:
            raise ValueError("Specified fields don't have the same parent, only fields with same parents can be merged.")

        # retrieve indexes of specified fields
        iField1 = None
        iField2 = None
        for iField, field in enumerate(field1.parent.fields):
            if field == field1:
                iField1 = iField
            elif field == field2:
                iField2 = iField

        if iField1 is None:
            raise ValueError("Cannot retrieve position of field1 in its parent fields")
        if iField2 is None:
            raise ValueError("Cannot retrieve position of field2 in its parent fields")
        if iField2 != iField1 + 1:
            raise ValueError("Field1 must be directly on the left of field2 (iField1={0}, iField2={1})".format(iField1, iField2))

        # build a new field domain
        newDomain = Agg([field1.domain, field2.domain])
        newField = Field(domain=newDomain, name="Merge")
        newField.encodingFunctions = field1.encodingFunctions.values()
        parent = field1.parent
        before = parent.fields[:iField1]
        after = parent.fields[iField2 + 1:]
        parent.fields = before + [newField] + after
    def split(field, delimiter):
        """Split a field (or symbol) with a specific delimiter. The
        delimiter can be passed either as an ASCII, a Raw, an
        HexaString, or any objects that inherit from AbstractType.

        >>> from netzob.all import *
        
        >>> samples = ["aaaaff000000ff10",	"bbff110010ff00000011",	"ccccccccfffe1f000000ff12"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages[:3])
        >>> Format.splitDelimiter(symbol, ASCII("ff"))
        >>> print symbol
        Field-0    | Field-sep-6666 | Field-2      | Field-sep-6666 | Field-4   
        ---------- | -------------- | ------------ | -------------- | ----------
        'aaaa'     | 'ff'           | '000000'     | 'ff'           | '10'      
        'bb'       | 'ff'           | '110010'     | 'ff'           | '00000011'
        'cccccccc' | 'ff'           | 'fe1f000000' | 'ff'           | '12'      
        ---------- | -------------- | ------------ | -------------- | ----------

        >>> samples = ["434d446964656e74696679230400000066726564", "5245536964656e74696679230000000000000000", "434d44696e666f2300000000", "524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e7469667923090000006d7950617373776421","52455361757468656e74696679230000000000000000","434d44656e6372797074230a00000031323334353674657374","524553656e637279707423000000000a00000073707176777436273136","434d4464656372797074230a00000073707176777436273136","5245536465637279707423000000000a00000031323334353674657374","434d446279652300000000","524553627965230000000000000000","434d446964656e746966792307000000526f626572746f","5245536964656e74696679230000000000000000","434d44696e666f2300000000","524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e74696679230a000000615374726f6e67507764","52455361757468656e74696679230000000000000000","434d44656e63727970742306000000616263646566","524553656e6372797074230000000006000000232021262724","434d44646563727970742306000000232021262724","52455364656372797074230000000006000000616263646566","434d446279652300000000","524553627965230000000000000000"]
        >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII))  # Change visualization to hexastring
        >>> Format.splitDelimiter(symbol, ASCII("#"))
        >>> print symbol
        Field-0         | Field-sep-23 | Field-2              | Field-sep-23 | Field-4
        --------------- | ------------ | -------------------- | ------------ | -------
        'CMDidentify'   | '#'          | '....fred'           | ''           | ''     
        'RESidentify'   | '#'          | '........'           | ''           | ''     
        'CMDinfo'       | '#'          | '....'               | ''           | ''     
        'RESinfo'       | '#'          | '........info'       | ''           | ''     
        'CMDstats'      | '#'          | '....'               | ''           | ''     
        'RESstats'      | '#'          | '........stats'      | ''           | ''     
        'CMDauthentify' | '#'          | '....myPasswd!'      | ''           | ''     
        'RESauthentify' | '#'          | '........'           | ''           | ''     
        'CMDencrypt'    | '#'          | '....123456test'     | ''           | ''     
        'RESencrypt'    | '#'          | "........spqvwt6'16" | ''           | ''     
        'CMDdecrypt'    | '#'          | "....spqvwt6'16"     | ''           | ''     
        'RESdecrypt'    | '#'          | '........123456test' | ''           | ''     
        'CMDbye'        | '#'          | '....'               | ''           | ''     
        'RESbye'        | '#'          | '........'           | ''           | ''     
        'CMDidentify'   | '#'          | '....Roberto'        | ''           | ''     
        'RESidentify'   | '#'          | '........'           | ''           | ''     
        'CMDinfo'       | '#'          | '....'               | ''           | ''     
        'RESinfo'       | '#'          | '........info'       | ''           | ''     
        'CMDstats'      | '#'          | '....'               | ''           | ''     
        'RESstats'      | '#'          | '........stats'      | ''           | ''     
        'CMDauthentify' | '#'          | '....aStrongPwd'     | ''           | ''     
        'RESauthentify' | '#'          | '........'           | ''           | ''     
        'CMDencrypt'    | '#'          | '....abcdef'         | ''           | ''     
        'RESencrypt'    | '#'          | '........'           | '#'          | " !&'$"
        'CMDdecrypt'    | '#'          | '....'               | '#'          | " !&'$"
        'RESdecrypt'    | '#'          | '........abcdef'     | ''           | ''     
        'CMDbye'        | '#'          | '....'               | ''           | ''     
        'RESbye'        | '#'          | '........'           | ''           | ''     
        --------------- | ------------ | -------------------- | ------------ | -------
        >>> print symbol.fields[0]._str_debug()
        Field-0
        |--   Alt
              |--   Data (Raw='CMDidentify' ((0, 88)))
              |--   Data (Raw='RESidentify' ((0, 88)))
              |--   Data (Raw='CMDinfo' ((0, 56)))
              |--   Data (Raw='RESinfo' ((0, 56)))
              |--   Data (Raw='CMDstats' ((0, 64)))
              |--   Data (Raw='RESstats' ((0, 64)))
              |--   Data (Raw='CMDauthentify' ((0, 104)))
              |--   Data (Raw='RESauthentify' ((0, 104)))
              |--   Data (Raw='CMDencrypt' ((0, 80)))
              |--   Data (Raw='RESencrypt' ((0, 80)))
              |--   Data (Raw='CMDdecrypt' ((0, 80)))
              |--   Data (Raw='RESdecrypt' ((0, 80)))
              |--   Data (Raw='CMDbye' ((0, 48)))
              |--   Data (Raw='RESbye' ((0, 48)))
 
        :param field : the field to consider when spliting
        :type: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :param delimiter : the delimiter used to split messages of the field
        :type: :class:`netzob.Common.Models.Types.AbstractType.AbstractType`
        """

        if delimiter is None:
            raise TypeError("Delimiter cannot be None.")

        if field is None:
            raise TypeError("Field cannot be None.")

        if len(field.messages) < 1:
            raise ValueError(
                "The associated symbol does not contain any message.")

        # Find message substrings after applying delimiter
        splittedMessages = []

        for cell in field.getValues(encoded=False, styled=False):
            splittedMessage = cell.split(delimiter.value.tobytes())
            splittedMessages.append(splittedMessage)

        import itertools
        # Inverse the array, so that columns contains observed values for each field
        splittedMessages = list(itertools.izip_longest(*splittedMessages))

        # If the delimiter does not create splitted fields
        if len(splittedMessages) <= 1:
            return

        # Else, we add (2*len(splittedMessages)-1) fields
        newFields = []
        iField = -1
        for i in range(len(splittedMessages)):
            iField += 1

            fieldDomain = list()

            # temporary set that hosts all the observed values to prevent useless duplicate ones
            observedValues = set()
            has_inserted_empty_value = False

            isEmptyField = True  # To avoid adding an empty field
            for v in splittedMessages[i]:
                if v != "" and v is not None:
                    isEmptyField = False

                    if v not in observedValues:
                        fieldDomain.append(Raw(v))
                        observedValues.add(v)
                else:
                    if not has_inserted_empty_value:
                        fieldDomain.append(Raw(nbBytes=0))
                        has_inserted_empty_value = True

            if not isEmptyField:
                newField = Field(
                    domain=DomainFactory.normalizeDomain(fieldDomain),
                    name="Field-" + str(iField))
                newField.encodingFunctions = field.encodingFunctions.values()
                newFields.append(newField)
                iField += 1

            fieldName = "Field-sep-" + TypeConverter.convert(
                delimiter.value, BitArray, HexaString)

            newFields.append(
                Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName))

        newFields.pop()

        # Reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Create a field for each entry
        field.fields = newFields
Esempio n. 8
0
    def cluster(self, field, keyField):
        """Create and return new symbols according to a specific key
        field.

        >>> import binascii
        >>> from netzob.all import *
        >>> samples = ["00ff2f000000",	"000020000000",	"00ff2f000000"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> f1 = Field(Raw(nbBytes=1))
        >>> f2 = Field(Raw(nbBytes=2))
        >>> f3 = Field(Raw(nbBytes=3))
        >>> symbol = Symbol([f1, f2, f3], messages=messages)
        >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString))
        >>> newSymbols = Format.clusterByKeyField(symbol, f2)
        >>> for sym in newSymbols.values():
        ...     sym.addEncodingFunction(TypeEncodingFunction(HexaString))
        ...     print sym.name + ":"
        ...     print sym
        Symbol_ff2f:
        Field | Field  | Field   
        ----- | ------ | --------
        '00'  | 'ff2f' | '000000'
        '00'  | 'ff2f' | '000000'
        ----- | ------ | --------
        Symbol_0020:
        Field | Field  | Field   
        ----- | ------ | --------
        '00'  | '0020' | '000000'
        ----- | ------ | --------

        :param field: the field we want to split in new symbols
        :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :param keyField: the field used as a key during the splitting operation
        :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :raise Exception if something bad happens
        """

        # Safe checks
        if field is None:
            raise TypeError("'field' should not be None")
        if keyField is None:
            raise TypeError("'keyField' should not be None")
        if keyField not in field.fields:
            raise TypeError("'keyField' is not a child of 'field'")

        newSymbols = {}

        keyFieldMessageValues = keyField.getMessageValues(encoded=False, styled=False)
        newSymbolsSplittedMessages = {}

        # we identify what would be the best type of the key field
        keyFieldType = ASCII
        for message, keyFieldValue in keyFieldMessageValues.iteritems():
            # If the value cannot be parsed as ASCII, we convert it to HexaString
            if not ASCII().canParse(TypeConverter.convert(keyFieldValue, Raw, BitArray)):
                keyFieldType = HexaString
                break

            # Even if the value is theoritically parsable as ASCII, some caracters cannot be encoded, so we double check
            tmp_value = TypeConverter.convert(keyFieldValue, Raw, ASCII)
            tmp2_value = TypeConverter.convert(tmp_value, ASCII, Raw)
            if keyFieldValue != tmp2_value:
                # This means we cannot retrieve the original value by encoding and then decoding in ASCII
                keyFieldType = HexaString
                break

        # we create a symbol for each of these uniq values
        for message, keyFieldValue in keyFieldMessageValues.iteritems():
            keyFieldValue = TypeConverter.convert(keyFieldValue, Raw, keyFieldType)
            if keyFieldValue not in newSymbols.keys():
                symbolName = "Symbol_{0}".format(keyFieldValue)
                newSymbols[keyFieldValue] = Symbol(name=symbolName, messages=[message])
                splittedMessages = DataAlignment.align([message.data], field, encoded=False)
                newSymbolsSplittedMessages[keyFieldValue] = [splittedMessages[0]]
            else:
                newSymbols[keyFieldValue].messages.append(message)
                splittedMessages = DataAlignment.align([message.data], field, encoded=False)
                newSymbolsSplittedMessages[keyFieldValue].append(splittedMessages[0])

        for newSymbolKeyValue, newSymbol in newSymbols.iteritems():
            # we recreate the same fields in this new symbol as the fields that exist in the original symbol
            newSymbol.clearFields()
            for i, f in enumerate(field.fields):
                if f == keyField:
                    newFieldDomain = TypeConverter.convert(newSymbolKeyValue, keyFieldType, Raw)
                else:
                    newFieldDomain = set()
                    for j in range(len(newSymbolsSplittedMessages[newSymbolKeyValue])):
                        newFieldDomain.add(newSymbolsSplittedMessages[newSymbolKeyValue][j][i])
                    newFieldDomain = list(newFieldDomain)
                newF = Field(name=f.name, domain=newFieldDomain)
                newF.parent = newSymbol
                newSymbol.fields.append(newF)

            # we remove endless fields that accepts no values
            cells = newSymbol.getCells(encoded=False, styled=False, transposed=False)
            max_i_cell_with_value = 0
            for line in cells:
                for i_cell, cell in enumerate(line):
                    if cell != '' and max_i_cell_with_value < i_cell:
                        max_i_cell_with_value = i_cell
            newSymbol.clearFields()
            for i, f in enumerate(field.fields[:max_i_cell_with_value + 1]):
                if f == keyField:
                    newFieldDomain = TypeConverter.convert(newSymbolKeyValue, keyFieldType, Raw)
                else:
                    newFieldDomain = set()
                    for j in range(len(newSymbolsSplittedMessages[newSymbolKeyValue])):
                        newFieldDomain.add(newSymbolsSplittedMessages[newSymbolKeyValue][j][i])
                    newFieldDomain = list(newFieldDomain)
                newF = Field(name=f.name, domain=newFieldDomain)
                newF.parent = newSymbol
                newSymbol.fields.append(newF)

        return newSymbols
Esempio n. 9
0
    def execute(self, field):
        """Executes the field edition following the specified messages.
        Children of the specified field will be replaced with new fields.

        :param field: the format definition that will be user
        :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :raise Exception: if something bad happens
        """

        if field is None:
            raise TypeError("The field cannot be None")
        fieldValues = [
            TypeConverter.convert(data, Raw, HexaString)
            for data in field.getValues(encoded=False)
        ]

        if len(fieldValues) == 0:
            raise Exception("No value found in the field.")

        # Retrieve longuest field value
        maxLengthFieldValue = len(max(fieldValues, key=len))

        # definies the step following specified unitsize
        stepUnitsize = self.__computeStepForUnitsize()

        # Vertical identification of variation
        indexedValues = []
        for i in range(0, maxLengthFieldValue, stepUnitsize):
            currentIndexValue = []
            for fieldValue in fieldValues:
                if i < len(fieldValue):
                    currentIndexValue.append(
                        fieldValue[i:min(len(fieldValue), i + stepUnitsize)])
                else:
                    currentIndexValue.append('')
            indexedValues.append(currentIndexValue)

        # If requested, merges the adjacent static fields
        if self.mergeAdjacentStaticFields:
            result = []
            staticSequences = []
            for values in indexedValues:
                if len(set(values)) == 1:
                    # static
                    staticSequences.append(values[0])
                else:
                    # dynamic
                    if len(staticSequences) > 0:
                        result.append([''.join(staticSequences)])
                        staticSequences = []
                    result.append(values)
            if len(staticSequences) > 0:
                result.append([''.join(staticSequences)])
            indexedValues = result

        # If requested, merges the adjacent dynamic fields
        if self.mergeAdjacentDynamicFields:
            result = []
            dynamicSequences = []
            for values in indexedValues:
                if len(set(values)) > 1:
                    # dynamic
                    dynamicSequences.append(values)
                else:
                    # static
                    if len(dynamicSequences) > 0:
                        dynValues = map(None, *dynamicSequences)
                        tmp_result = []
                        for d in dynValues:
                            tmp_result.append(''.join(
                                [x if x is not None else '' for x in d]))
                        result.append(tmp_result)
                        dynamicSequences = []
                    result.append(values)
            if len(dynamicSequences) > 0:
                dynValues = map(None, *dynamicSequences)
                tmp_result = []
                for d in dynValues:
                    tmp_result.append(''.join(
                        [x if x is not None else '' for x in d]))
                result.append(tmp_result)

            indexedValues = result

        # Create a field for each entry
        newFields = []
        for (i, val) in enumerate(indexedValues):
            fName = "Field-{0}".format(i)
            fDomain = DomainFactory.normalizeDomain([
                Raw(TypeConverter.convert(v, HexaString, BitArray))
                for v in set(val)
            ])
            newFields.append(Field(domain=fDomain, name=fName))

        # attach encoding functions
        for newField in newFields:
            newField.encodingFunctions = field.encodingFunctions.values()

        field.fields = newFields
Esempio n. 10
0
    def split(field, delimiter):
        """Split a field (or symbol) with a specific delimiter. The
        delimiter can be passed either as an ASCII, a Raw, an
        HexaString, or any objects that inherit from AbstractType.

        >>> from netzob.all import *
        >>> samples = ["aaaaff000000ff10",	"bbff110010ff00000011",	"ccccccccfffe1f000000ff12"]
        >>> messages = [RawMessage(data=sample) for sample in samples]
        >>> symbol = Symbol(messages=messages[:3])
        >>> Format.splitDelimiter(symbol, ASCII("ff"))
        >>> print symbol
        'aaaa'     | 'ff' | '000000'     | 'ff' | '10'      
        'bb'       | 'ff' | '110010'     | 'ff' | '00000011'
        'cccccccc' | 'ff' | 'fe1f000000' | 'ff' | '12'      

        >>> samples = ["434d446964656e74696679230400000066726564", "5245536964656e74696679230000000000000000", "434d44696e666f2300000000", "524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e7469667923090000006d7950617373776421","52455361757468656e74696679230000000000000000","434d44656e6372797074230a00000031323334353674657374","524553656e637279707423000000000a00000073707176777436273136","434d4464656372797074230a00000073707176777436273136","5245536465637279707423000000000a00000031323334353674657374","434d446279652300000000","524553627965230000000000000000","434d446964656e746966792307000000526f626572746f","5245536964656e74696679230000000000000000","434d44696e666f2300000000","524553696e666f230000000004000000696e666f","434d4473746174732300000000","52455373746174732300000000050000007374617473","434d4461757468656e74696679230a000000615374726f6e67507764","52455361757468656e74696679230000000000000000","434d44656e63727970742306000000616263646566","524553656e6372797074230000000006000000232021262724","434d44646563727970742306000000232021262724","52455364656372797074230000000006000000616263646566","434d446279652300000000","524553627965230000000000000000"]
        >>> messages = [RawMessage(data=TypeConverter.convert(sample, HexaString, Raw)) for sample in samples]
        >>> symbol = Symbol(messages=messages)
        >>> symbol.encodingFunctions.add(TypeEncodingFunction(ASCII))  # Change visualization to hexastring
        >>> Format.splitDelimiter(symbol, ASCII("#"))
        >>> print symbol
        'CMDidentify'   | '#' | '....fred'           | ''  | ''     
        'RESidentify'   | '#' | '........'           | ''  | ''     
        'CMDinfo'       | '#' | '....'               | ''  | ''     
        'RESinfo'       | '#' | '........info'       | ''  | ''     
        'CMDstats'      | '#' | '....'               | ''  | ''     
        'RESstats'      | '#' | '........stats'      | ''  | ''     
        'CMDauthentify' | '#' | '....myPasswd!'      | ''  | ''     
        'RESauthentify' | '#' | '........'           | ''  | ''     
        'CMDencrypt'    | '#' | '....123456test'     | ''  | ''     
        'RESencrypt'    | '#' | "........spqvwt6'16" | ''  | ''     
        'CMDdecrypt'    | '#' | "....spqvwt6'16"     | ''  | ''     
        'RESdecrypt'    | '#' | '........123456test' | ''  | ''     
        'CMDbye'        | '#' | '....'               | ''  | ''     
        'RESbye'        | '#' | '........'           | ''  | ''     
        'CMDidentify'   | '#' | '....Roberto'        | ''  | ''     
        'RESidentify'   | '#' | '........'           | ''  | ''     
        'CMDinfo'       | '#' | '....'               | ''  | ''     
        'RESinfo'       | '#' | '........info'       | ''  | ''     
        'CMDstats'      | '#' | '....'               | ''  | ''     
        'RESstats'      | '#' | '........stats'      | ''  | ''     
        'CMDauthentify' | '#' | '....aStrongPwd'     | ''  | ''     
        'RESauthentify' | '#' | '........'           | ''  | ''     
        'CMDencrypt'    | '#' | '....abcdef'         | ''  | ''     
        'RESencrypt'    | '#' | '........'           | '#' | " !&'$"
        'CMDdecrypt'    | '#' | '....'               | '#' | " !&'$"
        'RESdecrypt'    | '#' | '........abcdef'     | ''  | ''     
        'CMDbye'        | '#' | '....'               | ''  | ''     
        'RESbye'        | '#' | '........'           | ''  | ''     


        :param field : the field to consider when spliting
        :type: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :param delimiter : the delimiter used to split messages of the field
        :type: :class:`netzob.Common.Models.Types.AbstractType.AbstractType`
        """

        if delimiter is None:
            raise TypeError("Delimiter cannot be None.")

        if field is None:
            raise TypeError("Field cannot be None.")

        if len(field.messages) < 1:
            raise ValueError("The associated symbol does not contain any message.")

        # Find message substrings after applying delimiter
        splittedMessages = []

        for cell in field.getValues(encoded=False, styled=False):
            splittedMessage = cell.split(delimiter.value.tobytes())
            splittedMessages.append(splittedMessage)

        import itertools
        # Inverse the array, so that columns contains observed values for each field
        splittedMessages = list(itertools.izip_longest(*splittedMessages))
        
        # If the delimiter does not create splitted fields
        if len(splittedMessages) <= 1:
            return

        # Else, we add (2*len(splittedMessages)-1) fields
        newFields = []
        iField = -1
        for i in range(len(splittedMessages)):
            iField += 1
            fieldDomain = set()
            isEmptyField = True  # To avoid adding an empty field
            emptyValueFound = False
            for v in splittedMessages[i]:
                if v != "" and v is not None:
                    isEmptyField = False
                    fieldDomain.add(Raw(v))
                else:
                    fieldDomain.add(Raw(nbBytes=0))

            if not isEmptyField:
                fieldDomain = list(fieldDomain)
                newField = Field(domain=DomainFactory.normalizeDomain(fieldDomain), name="Field-"+str(iField))
                newField.encodingFunctions = field.encodingFunctions.values()
                newFields.append(newField)
                iField += 1

            fieldName = "Field-sep-" + TypeConverter.convert(delimiter.value, BitArray, HexaString)

            newFields.append(Field(domain=Alt([delimiter, Raw(nbBytes=0)]), name=fieldName))

        newFields.pop()

        # Reset the field
        from netzob.Inference.Vocabulary.Format import Format
        Format.resetFormat(field)

        # Create a field for each entry
        field.fields = newFields
Esempio n. 11
0
    def cluster(self, field, keyField):
        """Create and return new symbols according to a specific key
        field.

        >>> import binascii
        >>> from netzob.all import *
        >>> samples = ["00ff2f000000",	"000020000000",	"00ff2f000000"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> f1 = Field(Raw(nbBytes=1))
        >>> f2 = Field(Raw(nbBytes=2))
        >>> f3 = Field(Raw(nbBytes=3))
        >>> symbol = Symbol([f1, f2, f3], messages=messages)
        >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString))
        >>> newSymbols = Format.clusterByKeyField(symbol, f2)
        >>> for sym in newSymbols.values():
        ...     sym.addEncodingFunction(TypeEncodingFunction(HexaString))
        ...     print sym.name + ":"
        ...     print sym
        Symbol_ff2f:
        Field | Field  | Field   
        ----- | ------ | --------
        '00'  | 'ff2f' | '000000'
        '00'  | 'ff2f' | '000000'
        ----- | ------ | --------
        Symbol_0020:
        Field | Field  | Field   
        ----- | ------ | --------
        '00'  | '0020' | '000000'
        ----- | ------ | --------

        :param field: the field we want to split in new symbols
        :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :param keyField: the field used as a key during the splitting operation
        :type field: :class:`netzob.Common.Models.Vocabulary.AbstractField.AbstractField`
        :raise Exception if something bad happens
        """

        # Safe checks
        if field is None:
            raise TypeError("'field' should not be None")
        if keyField is None:
            raise TypeError("'keyField' should not be None")
        if keyField not in field.fields:
            raise TypeError("'keyField' is not a child of 'field'")

        newSymbols = {}

        keyFieldMessageValues = keyField.getMessageValues(encoded=False,
                                                          styled=False)
        newSymbolsSplittedMessages = {}

        # we identify what would be the best type of the key field
        keyFieldType = ASCII
        for message, keyFieldValue in keyFieldMessageValues.iteritems():
            # If the value cannot be parsed as ASCII, we convert it to HexaString
            if not ASCII().canParse(
                    TypeConverter.convert(keyFieldValue, Raw, BitArray)):
                keyFieldType = HexaString
                break

            # Even if the value is theoritically parsable as ASCII, some caracters cannot be encoded, so we double check
            tmp_value = TypeConverter.convert(keyFieldValue, Raw, ASCII)
            tmp2_value = TypeConverter.convert(tmp_value, ASCII, Raw)
            if keyFieldValue != tmp2_value:
                # This means we cannot retrieve the original value by encoding and then decoding in ASCII
                keyFieldType = HexaString
                break

        # we create a symbol for each of these uniq values
        for message, keyFieldValue in keyFieldMessageValues.iteritems():
            keyFieldValue = TypeConverter.convert(keyFieldValue, Raw,
                                                  keyFieldType)
            if keyFieldValue not in newSymbols.keys():
                symbolName = "Symbol_{0}".format(keyFieldValue)
                newSymbols[keyFieldValue] = Symbol(name=symbolName,
                                                   messages=[message])
                splittedMessages = DataAlignment.align([message.data],
                                                       field,
                                                       encoded=False)
                newSymbolsSplittedMessages[keyFieldValue] = [
                    splittedMessages[0]
                ]
            else:
                newSymbols[keyFieldValue].messages.append(message)
                splittedMessages = DataAlignment.align([message.data],
                                                       field,
                                                       encoded=False)
                newSymbolsSplittedMessages[keyFieldValue].append(
                    splittedMessages[0])

        for newSymbolKeyValue, newSymbol in newSymbols.iteritems():
            # we recreate the same fields in this new symbol as the fields that exist in the original symbol
            newSymbol.clearFields()
            for i, f in enumerate(field.fields):
                if f == keyField:
                    newFieldDomain = TypeConverter.convert(
                        newSymbolKeyValue, keyFieldType, Raw)
                else:
                    newFieldDomain = set()
                    for j in range(
                            len(newSymbolsSplittedMessages[newSymbolKeyValue])
                    ):
                        newFieldDomain.add(
                            newSymbolsSplittedMessages[newSymbolKeyValue][j]
                            [i])
                    newFieldDomain = list(newFieldDomain)
                newF = Field(name=f.name, domain=newFieldDomain)
                newF.parent = newSymbol
                newSymbol.fields.append(newF)

            # we remove endless fields that accepts no values
            cells = newSymbol.getCells(encoded=False,
                                       styled=False,
                                       transposed=False)
            max_i_cell_with_value = 0
            for line in cells:
                for i_cell, cell in enumerate(line):
                    if cell != '' and max_i_cell_with_value < i_cell:
                        max_i_cell_with_value = i_cell
            newSymbol.clearFields()
            for i, f in enumerate(field.fields[:max_i_cell_with_value + 1]):
                if f == keyField:
                    newFieldDomain = TypeConverter.convert(
                        newSymbolKeyValue, keyFieldType, Raw)
                else:
                    newFieldDomain = set()
                    for j in range(
                            len(newSymbolsSplittedMessages[newSymbolKeyValue])
                    ):
                        newFieldDomain.add(
                            newSymbolsSplittedMessages[newSymbolKeyValue][j]
                            [i])
                    newFieldDomain = list(newFieldDomain)
                newF = Field(name=f.name, domain=newFieldDomain)
                newF.parent = newSymbol
                newSymbol.fields.append(newF)

        return newSymbols