コード例 #1
0
ファイル: flac.py プロジェクト: zpchavez/boottunes
    def read_subframe_lpc(self, order, block_size, bits_per_sample):
        samples = construct.StrictRepeater(
            order,
            construct.Bits("warm_up_samples", bits_per_sample, signed=True))

        subframe = array.array('i', samples.parse_stream(self.bitstream))

        lpc_precision = construct.Bits('lpc_precision', 4).parse_stream(
            self.bitstream) + 1

        lpc_shift = construct.Bits('lpc_shift', 5).parse_stream(self.bitstream)

        coefficients = array.array(
            'i',
            construct.StrictRepeater(
                order,
                construct.Bits('coefficients', lpc_precision,
                               signed=True)).parse_stream(self.bitstream))

        residual = self.read_residual(block_size, order)

        for i in xrange(len(subframe), block_size):
            subframe.insert(i,
                            (sum(
                [coefficients[j] * subframe[i - j - 1] for j in
                 xrange(0,len(coefficients))]) >> lpc_shift) + \
                            residual[i])

        return subframe
コード例 #2
0
ファイル: flac.py プロジェクト: zpchavez/boottunes
    def read_residual(self, block_size, predictor_order):
        rice = array.array('i')

        #add some dummy rice so that the Rice index matches
        #that of the rest of the subframe
        for i in xrange(predictor_order):
            rice.append(0)

        coding_method = self.bitstream.read(2)
        if (coding_method == '\x00\x00'):
            rice2 = False
        elif (coding_method == '\x00\x01'):
            rice2 = True
        else:
            raise FlacStreamException('invalid residual coding method')

        partition_order = construct.Bits('partition_order',
                                         4).parse_stream(self.bitstream)

        if (partition_order > 0):
            total_samples = ((block_size / 2**partition_order) -
                             predictor_order)
            rice.extend(self.read_encoded_rice(total_samples, rice2))

            for i in xrange(1, 2**partition_order):
                total_samples = (block_size / 2**partition_order)

                rice.extend(self.read_encoded_rice(total_samples, rice2))
        else:
            rice.extend(
                self.read_encoded_rice(block_size - predictor_order, rice2))

        return rice
コード例 #3
0
ファイル: flac.py プロジェクト: zpchavez/boottunes
 def read_subframe_verbatim(self, block_size, bits_per_sample):
     return array.array(
         'i',
         construct.StrictRepeater(
             block_size,
             construct.Bits("samples", bits_per_sample,
                            signed=True)).parse_stream(self.bitstream))
コード例 #4
0
ファイル: flac.py プロジェクト: zpchavez/boottunes
    def read_subframe_constant(self, block_size, bits_per_sample):
        sample = construct.Bits('b',
                                bits_per_sample).parse_stream(self.bitstream)

        subframe = array.array('i', [sample] * block_size)

        return subframe
コード例 #5
0
ファイル: flac.py プロジェクト: zpchavez/boottunes
    def read_encoded_rice(self, total_samples, rice2=False):
        bin_to_int = construct.lib.binary.bin_to_int

        samples = array.array('i')

        if (not rice2):
            rice_parameter = construct.Bits('rice_parameter',
                                            4).parse_stream(self.bitstream)
        else:
            rice_parameter = construct.Bits('rice_parameter',
                                            5).parse_stream(self.bitstream)

        if (rice_parameter != 0xF):
            #a Rice encoded residual
            for x in xrange(total_samples):

                #count the number of 0 bits before the next 1 bit
                #(unary encoding)
                #to find our most significant bits
                msb = 0
                s = self.bitstream.read(1)
                while (s != '\x01'):
                    msb += 1
                    s = self.bitstream.read(1)

                #grab the proper number of least significant bits
                lsb = bin_to_int(self.bitstream.read(rice_parameter))

                #combine msb and lsb to get the Rice-encoded value
                value = (msb << rice_parameter) | lsb
                if ((value & 0x1) == 0x1):  #negative
                    samples.append(-(value >> 1) - 1)
                else:  #positive
                    samples.append(value >> 1)
        else:
            #unencoded residual

            bits_per_sample = construct.Bits('escape_code',
                                             5).parse_stream(self.bitstream)

            sample = construct.Bits("sample", bits_per_sample, signed=True)

            for x in xrange(total_samples):
                samples.append(sample.parse_stream(self.bitstream))

        return samples
コード例 #6
0
ファイル: flac.py プロジェクト: zpchavez/boottunes
    def read_subframe_fixed(self, order, block_size, bits_per_sample):
        samples = construct.StrictRepeater(
            order,
            construct.Bits("warm_up_samples", bits_per_sample, signed=True))

        subframe = array.array('i', samples.parse_stream(self.bitstream))

        residual = self.read_residual(block_size, order)

        fixed_func = self.FIXED_FUNCTIONS[order]

        for i in xrange(len(subframe), block_size):
            fixed_func(subframe, residual, i)

        return subframe
コード例 #7
0
ファイル: asl.py プロジェクト: cvandeplas/plaso
class AslParser(interface.BaseParser):
    """Parser for ASL log files."""

    NAME = 'asl_log'
    DESCRIPTION = u'Parser for ASL log files.'

    ASL_MAGIC = 'ASL DB\x00\x00\x00\x00\x00\x00'

    # If not right assigned, the value is "-1".
    ASL_NO_RIGHTS = 'ffffffff'

    # Priority level (criticity)
    ASL_MESSAGE_PRIORITY = {
        0: 'EMERGENCY',
        1: 'ALERT',
        2: 'CRITICAL',
        3: 'ERROR',
        4: 'WARNING',
        5: 'NOTICE',
        6: 'INFO',
        7: 'DEBUG'
    }

    # ASL File header.
    # magic: magic number that identify ASL files.
    # version: version of the file.
    # offset: first record in the file.
    # timestamp: epoch time when the first entry was written.
    # last_offset: last record in the file.
    ASL_HEADER_STRUCT = construct.Struct('asl_header_struct',
                                         construct.String('magic', 12),
                                         construct.UBInt32('version'),
                                         construct.UBInt64('offset'),
                                         construct.UBInt64('timestamp'),
                                         construct.UBInt32('cache_size'),
                                         construct.UBInt64('last_offset'),
                                         construct.Padding(36))

    # The record structure is:
    # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry]
    # Record static structure.
    # tam_entry: it contains the number of bytes from this file position
    #            until the end of the record, without counts itself.
    # next_offset: next record. If is equal to 0x00, it is the last record.
    # asl_message_id: integer that has the numeric identification of the event.
    # timestamp: Epoch integer that has the time when the entry was created.
    # nanosecond: nanosecond to add to the timestamp.
    # level: level of priority.
    # pid: process identification that ask to save the record.
    # uid: user identification that has lunched the process.
    # gid: group identification that has lunched the process.
    # read_uid: identification id of a user. Only applied if is not -1 (all FF).
    #           Only root and this user can read the entry.
    # read_gid: the same than read_uid, but for the group.
    ASL_RECORD_STRUCT = construct.Struct('asl_record_struct',
                                         construct.Padding(2),
                                         construct.UBInt32('tam_entry'),
                                         construct.UBInt64('next_offset'),
                                         construct.UBInt64('asl_message_id'),
                                         construct.UBInt64('timestamp'),
                                         construct.UBInt32('nanosec'),
                                         construct.UBInt16('level'),
                                         construct.UBInt16('flags'),
                                         construct.UBInt32('pid'),
                                         construct.UBInt32('uid'),
                                         construct.UBInt32('gid'),
                                         construct.UBInt32('read_uid'),
                                         construct.UBInt32('read_gid'),
                                         construct.UBInt64('ref_pid'))

    ASL_RECORD_STRUCT_SIZE = ASL_RECORD_STRUCT.sizeof()

    # 8-byte fields, they can be:
    # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String].
    # - Integer: integer that has the byte position in the file that points
    #            to an ASL_RECORD_DYN_VALUE struct. If the value of the integer
    #            is equal to 0, it means that it has not data (skip).

    # If the field is a String, we use this structure to decode each
    # integer byte in the corresponding character (ASCII Char).
    ASL_OCTET_STRING = construct.ExprAdapter(construct.Octet('string'),
                                             encoder=lambda obj, ctx: ord(obj),
                                             decoder=lambda obj, ctx: chr(obj))

    # Field string structure. If the first bit is 1, it means that it
    # is a String (1000) = 8, then the next nibble has the number of
    # characters. The last 7 bytes are the number of bytes.
    ASL_STRING = construct.BitStruct(
        'string', construct.Flag('type'), construct.Bits('filler', 3),
        construct.If(lambda ctx: ctx.type, construct.Nibble('string_length')),
        construct.If(lambda ctx: ctx.type,
                     construct.Array(7, ASL_OCTET_STRING)))

    # 8-byte pointer to a byte position in the file.
    ASL_POINTER = construct.UBInt64('pointer')

    # Dynamic data structure pointed by a pointer that contains a String:
    # [2 bytes padding][4 bytes lenght of String][String].
    ASL_RECORD_DYN_VALUE = construct.Struct(
        'asl_record_dyn_value', construct.Padding(2),
        construct.PascalString('value',
                               length_field=construct.UBInt32('length')))

    def Parse(self, parser_context, file_entry):
        """Extract entries from an ASL file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self.ASL_HEADER_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Unable to parse ASL Header with error: {0:s}.'.format(
                    exception))

        if header.magic != self.ASL_MAGIC:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Not an ASL Header, unable to parse.')

        # Get the first and the last entry.
        offset = header.offset
        old_offset = header.offset
        last_offset_header = header.last_offset

        # If the ASL file has entries.
        if offset:
            event_object, offset = self.ReadAslEvent(file_object, offset)
            while event_object:
                parser_context.ProduceEvent(event_object,
                                            parser_name=self.NAME,
                                            file_entry=file_entry)

                # TODO: an anomaly object must be emitted once that is implemented.
                # Sanity check, the last read element must be the same as
                # indicated by the header.
                if offset == 0 and old_offset != last_offset_header:
                    logging.warning(u'Parsing ended before the header ends.')
                old_offset = offset
                event_object, offset = self.ReadAslEvent(file_object, offset)

        file_object.close()

    def ReadAslEvent(self, file_object, offset):
        """Returns an AslEvent from a single ASL entry.

    Args:
      file_object: a file-like object that points to an ASL file.
      offset: offset where the static part of the entry starts.

    Returns:
      An event object constructed from a single ASL record.
    """
        # The heap of the entry is saved to try to avoid seek (performance issue).
        # It has the real start position of the entry.
        dynamic_start = file_object.tell()
        dynamic_part = file_object.read(offset - file_object.tell())

        if not offset:
            return None, None

        try:
            record_header = self.ASL_RECORD_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            logging.warning(
                u'Unable to parse ASL event with error: {0:s}'.format(
                    exception))
            return None, None

        # Variable tam_fields = is the real length of the dynamic fields.
        # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before]
        # In Record_Struct we have a field called tam_entry, where it has the number
        # of bytes until the end of the entry from the position that the field is.
        # The tam_entry is between the 2th and the 6th byte in the [Record_Struct].
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # Also, we do not need [Point_Entry_Before] and then we delete the size of
        # [Point_Entry_Before] that it is 8 bytes (8):
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8
        # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2
        tam_fields = record_header.tam_entry - self.ASL_RECORD_STRUCT_SIZE - 2

        # Dynamic part of the entry that contains minimal four fields of 8 bytes
        # plus 2x[8bytes] fields for each extra ASL_Field.
        # The four first fields are always the Host, Sender, Facility and Message.
        # After the four first fields, the entry might have extra ASL_Fields.
        # For each extra ASL_field, it has a pair of 8-byte fields where the first
        # 8 bytes contains the name of the extra ASL_field and the second 8 bytes
        # contains the text of the exta field.
        # All of this 8-byte field can be saved using one of these three differents
        # types:
        # - Null value ('0000000000000000'): nothing to do.
        # - String: It is string if first bit = 1 or first nibble = 8 (1000).
        #           Second nibble has the length of string.
        #           The next 7 bytes have the text characters of the string
        #           padding the end with null characters: '0x00'.
        #           Example: [8468 6964 6400 0000]
        #                    [8] String, [4] length, value: [68 69 64 64] = hidd.
        # - Pointer: static position in the file to a special struct
        #            implemented as an ASL_RECORD_DYN_VALUE.
        #            Example: [0000 0000 0000 0077]
        #            It points to the file position 0x077 that has a
        #            ASL_RECORD_DYN_VALUE structure.
        values = []
        while tam_fields > 0:
            try:
                raw_field = file_object.read(8)
            except (IOError, construct.FieldError) as exception:
                logging.warning(
                    u'Unable to parse ASL event with error: {0:d}'.format(
                        exception))
                return None, None
            try:
                # Try to read as a String.
                field = self.ASL_STRING.parse(raw_field)
                values.append(''.join(field.string[0:field.string_length]))
                # Go to parse the next extra field.
                tam_fields -= 8
                continue
            except ValueError:
                pass
            # If it is not a string, it must be a pointer.
            try:
                field = self.ASL_POINTER.parse(raw_field)
            except ValueError as exception:
                logging.warning(
                    u'Unable to parse ASL event with error: {0:s}'.format(
                        exception))
                return None, None
            if field != 0:
                # The next IF ELSE is only for performance issues, avoiding seek.
                # If the pointer points a lower position than where the actual entry
                # starts, it means that it points to a previuos entry.
                pos = field - dynamic_start
                # Bigger or equal 0 means that the data is in the actual entry.
                if pos >= 0:
                    try:
                        values.append((self.ASL_RECORD_DYN_VALUE.parse(
                            dynamic_part[pos:])).value.partition('\x00')[0])
                    except (IOError, construct.FieldError) as exception:
                        logging.warning(
                            u'Unable to parse ASL event with error: {0:s}'.
                            format(exception))
                        return None, None
                else:
                    # Only if it is a pointer that points to the
                    # heap from another entry we use the seek method.
                    main_position = file_object.tell()
                    # If the pointer is in a previous entry.
                    if main_position > field:
                        file_object.seek(field - main_position, os.SEEK_CUR)
                        try:
                            values.append(
                                (self.ASL_RECORD_DYN_VALUE.parse_stream(
                                    file_object)).value.partition('\x00')[0])
                        except (IOError, construct.FieldError):
                            logging.warning((
                                u'The pointer at {0:d} (0x{0:x}) points to invalid '
                                u'information.'
                            ).format(main_position -
                                     self.ASL_POINTER.sizeof()))
                        # Come back to the position in the entry.
                        _ = file_object.read(main_position -
                                             file_object.tell())
                    else:
                        _ = file_object.read(field - main_position)
                        values.append((self.ASL_RECORD_DYN_VALUE.parse_stream(
                            file_object)).value.partition('\x00')[0])
                        # Come back to the position in the entry.
                        file_object.seek(main_position - file_object.tell(),
                                         os.SEEK_CUR)
            # Next extra field: 8 bytes more.
            tam_fields -= 8

        # Read the last 8 bytes of the record that points to the previous entry.
        _ = file_object.read(8)

        # Parsed section, we translate the read data to an appropriate format.
        microsecond = record_header.nanosec // 1000
        timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
            record_header.timestamp, microsecond)
        record_position = offset
        message_id = record_header.asl_message_id
        level = u'{0} ({1})'.format(
            self.ASL_MESSAGE_PRIORITY[record_header.level],
            record_header.level)
        # If the value is -1 (FFFFFFFF), it can be read by everyone.
        if record_header.read_uid != int(self.ASL_NO_RIGHTS, 16):
            read_uid = record_header.read_uid
        else:
            read_uid = 'ALL'
        if record_header.read_gid != int(self.ASL_NO_RIGHTS, 16):
            read_gid = record_header.read_gid
        else:
            read_gid = 'ALL'

        # Parsing the dynamic values (text or pointers to position with text).
        # The first four are always the host, sender, facility, and message.
        computer_name = values[0]
        sender = values[1]
        facility = values[2]
        message = values[3]

        # If the entry has an extra fields, they works as a pairs:
        # The first is the name of the field and the second the value.
        extra_information = ''
        if len(values) > 4:
            values = values[4:]
            for index in xrange(0, len(values) // 2):
                extra_information += (u'[{0}: {1}]'.format(
                    values[index * 2], values[(index * 2) + 1]))

        # Return the event and the offset for the next entry.
        return AslEvent(timestamp, record_position, message_id, level,
                        record_header, read_uid, read_gid, computer_name,
                        sender, facility, message,
                        extra_information), record_header.next_offset
コード例 #8
0
class MasterBootRecord(BootRecord):
    _MBR_STRUCT = construct.Struct(
        "mbr",
        construct.HexDumpAdapter(construct.Bytes("bootloader_code", 440)),
        construct.Field('disk_signature', 4),
        construct.Padding(2),
        construct.Array(
            4,
            construct.Struct(
                "partitions",
                construct.SLInt8("state"),
                construct.BitStruct(
                    "beginning",
                    construct.Octet("head"),
                    construct.Bits("sect", 6),
                    construct.Bits("cyl", 10),
                ),
                construct.Enum(
                    construct.UBInt8("type"),
                    Nothing=0x00,
                    FAT12=0x01,
                    XENIX_ROOT=0x02,
                    XENIX_USR=0x03,
                    FAT16_old=0x04,
                    Extended_DOS=0x05,
                    FAT16=0x06,
                    FAT32=0x0b,
                    FAT32_LBA=0x0c,
                    NTFS=0x07,
                    LINUX_SWAP=0x82,
                    LINUX_NATIVE=0x83,
                    PROTECTIVE_MBR=0xee,
                    _default_=construct.Pass,
                ),
                construct.BitStruct(
                    "ending",
                    construct.Octet("head"),
                    construct.Bits("sect", 6),
                    construct.Bits("cyl", 10),
                ),
                construct.ULInt32(
                    "sector_offset"),  # offset from MBR in sectors
                construct.ULInt32("size"),  # in sectors
            )),
        construct.Const(construct.Bytes("signature", 2), '55aa'.decode('hex')),
    )

    def __init__(self, filePath, size, offset=None, whitelist=()):
        self._type = 'MBR'
        super(MasterBootRecord, self).__init__(filePath, size, offset,
                                               whitelist)

    def _parse(self):
        """
            Main method in charge of parsing the MBR.
            It will try to parse the boot record according to documented known structure and extract the partition table
            disk signature and code section.
            It will then try to narrow down invariant code, hash it and match the hash against a whitelist.
            If no match was found, it will try some simple heuristics to detect malicious behaviours.

        Returns: nothing

        """
        try:
            mbr = self._MBR_STRUCT.parse(self._raw)
        except construct.core.ConstructError as e:
            raise InvalidMBRError('Invalid MBR structure: {0}\n{1}'.format(
                e, hexdump(self._raw)))

        self._parsePartTable(mbr.partitions)

        # Windows stores the disk signature at 0x1B8, other MBRs seem to leave this area alone
        self._diskSignature = mbr.disk_signature

        # If code section is null, check for protective MBR signature (detected in partition table parsing). If found,
        # then the machine is likely using UEFI instead of BIOS to boot. If not, it could mean that the sample being
        # analyzed has been tampered by a bootkit
        if mbr.bootloader_code.encode('hex') == 440 * '00':
            if 'Protective MBR' in self._signature:
                self._signature.append('UEFI (no legacy boot code)')
            else:
                self._suspiciousBehaviour.append('Code section is null')
        else:
            expectedLoader, invariantCode = self._getInvariantCode(
                mbr.bootloader_code)
            codeHash = hashlib.sha256(invariantCode)
            self._matchHash(codeHash, expectedLoader)
            if len(self._signature) == 0:
                # No whitelisted signature matched, try some simple heuristics to flag this MBR as malicious
                # Note that the self._checkCode method is only given the "invariant" code section to help with the
                # disassembling. This will obviously leads to broken offsets, but it doesn't matter since the heuristics
                # don't use them.
                self._checkCode(invariantCode)

    def _parsePartTable(self, partitions):
        """
            Private method that parses the partition table of the MBR. Updates self._partTable list.

        Args:
            partitions: Construct.Container object of the partition table

        Returns: nothing
        """
        partNum = 0
        for part in partitions:
            partNum += 1
            # Assume a partition entry without size (in LBA) or type is invalid, and do not include it in the listing.
            if part.size != 0 and part.type != 'Nothing':
                self._partTable.append((partNum, part.state < 0, part.type,
                                        part.sector_offset, part.size))
            else:
                self._logger.debug('Ignoring invalid partition: %s', part)
            # Early detection of protective MBR so that we don't try to make sense of the MBR partition table
            if part.type == 'PROTECTIVE_MBR' and partNum == 1:
                self._logger.debug(
                    'Protective MBR detected, MBR partition table should not be taken into account. '
                    'GPT partition table parser not implemented yet')
                self._signature.append('Protective MBR')

    def _getInvariantCode(self, rawCode):
        """
            Helper method that tries to narrow down "invariant code" which can be hashed and compared to well known
            signatures. Most MBRs have localized error strings which must be excluded from the hash computation because
            they may vary from a country to another.
            First, this method tries to detect what kind of MBR it is dealing with. Most of the time, it is enough to
            to look for some known hardcoded strings that identify "well known" MBR (such as Truecrypt, GRUB2, etc...).
            Then, this method finds where the strings are and "removes" them (as in "does not include them").
            Finding these strings can be achieved by quickly studying the assembly code and looking for how these
            strings are echoed on screen at boot time (using interrupt 0x10).
            This research only needs to be done once for each type of MBR but requires an analyst to do it by static
            analysis. This script cannot take care of this. This method merely implements the results of such work.

            Currently supported MBR are:
             - Truecrypt
             - McAfee Endpoint Encryption (Safeboot)
             - GRUB2
             - Windows (XP to 10)

        Args:
            rawCode: str of the code section

        Returns: 2-tuple (unicode string of expected loader, concatenated strings of invariant sections of code)

        """
        # By default, assume all the MBR code section will be hashed. It is obviously wrong in most cases, but it allows
        # for a "default case" which will automatically matches no known hash in case something goes wrong with the
        # detection.
        codeStart = 0
        codeEnd = len(rawCode)
        expectedLoader = None
        invariantCode = str()

        # TrueCrypt (detected with the hardcoded string following the first jump: " TrueCrypt Boot Loader")
        if rawCode[0x5:0x1b].encode('hex').upper(
        ) == '2054727565437279707420426F6F74204C6F61646572':
            # TrueCrypt uses hardcoded and not-localized error strings. Therefore every TrueCrypt MBR should have the
            # same code from start to end
            expectedLoader = 'TrueCrypt MBR'

        # MacAfee SafeBoot (detected with the hardcoded string following the first jump: "Safeboot ")
        elif rawCode[0x3:0xc].encode('hex').upper() == '53616665426F6F7420':
            # Two versions have been seen but both start with a jump to the same offset (0x26).
            # There are some strings at the of the code section but localization is unlikely so it will be assumed
            # to be hardcoded (until a localized version is found...).
            # Therefore, Safeboot code can be hashed from 0x26 to the end of code section
            invariantCode += rawCode[:0x3]  # Hash the first JMP
            codeStart = 0x26
            expectedLoader = 'Safeboot MBR'

        # GRUB (detected with the hardcoded string "GRUB " located at 0x188)
        elif rawCode[0x188:0x18d].encode('hex').upper() == '4752554220':
            # GRUB has some error strings but they are hardcoded and not localized so they can be included in the hash
            # computation. However GRUB can be installed on a disk (MBR) as well as on a partition (in a kind of VBR).
            # But in both cases the code used is the same. Since a BPB is needed for the latter case it is also present
            # in the MBR (but not needed). It therefore has to be excluded from the hash computation.
            # GRUB is jumping over the BIOS Parameter Block located between 0x3 and 0x5a.
            # It should be followed by the kernel address (word), kernel sector (dword), kernel sector high (dword) and
            # boot drive (byte). Therefore the code really starts at 0x65.
            # These values are hardcoded in boot.img and have little chance to change anytime soon.
            codeStart = 0x65
            invariantCode += rawCode[:0x3]  # Hash the first JMP
            expectedLoader = 'GRUB2 MBR'

        # Windows MBR cannot be detected with hardcoded strings, so they fall in the default case and further checks
        # are then made based on the hypothesis that this is indeed a Windows MBR.
        else:
            # Starting with NT5.0, the MBR contains localized strings which must be excluded from the hash computation.
            # These strings are located after the code, at 3 different offsets which can be calculated by adding 0x100
            # to the values respectively stored in bytes 0x1b5, 0x1b6 and 0x1b7 (last bytes of the code section).
            # Eg: The first localized string is at : 0x100 + the value saved at offset 0x1B5
            # Even though localized strings can be of different lengths, the offset of the first one does not vary
            # given one Windows version. This can therefore be used to tell Windows versions apart.
            firstStrOffset = construct.UBInt8('FirstStringOffset').parse(
                rawCode[0x1b5])
            # Windows NT5
            if firstStrOffset == 0x2c:
                expectedLoader = 'NT5.1/NT5.2 MBR'
                codeEnd = 0x100 + firstStrOffset
            # Windows NT6.0
            elif firstStrOffset == 0x62:
                expectedLoader = 'NT6.0 MBR'
                codeEnd = 0x100 + firstStrOffset
            # Windows NT6.1+
            elif firstStrOffset == 0x63:
                expectedLoader = 'NT6.1+ MBR'
                codeEnd = 0x100 + firstStrOffset
            else:
                self._suspiciousBehaviour.append(
                    'Invalid string offset: {0:#x}'.format(firstStrOffset))
                self._logger.debug(
                    'First localized string offset is wrong for a windows MBR.'
                    'It should be 0x2c, 0x62 or 0x63) : {0:#x}'.format(
                        firstStrOffset))

        self._logger.debug(
            'Expecting {0}. Code starts at {1:#x} and ends at {2:#x}'.format(
                expectedLoader, codeStart, codeEnd))

        invariantCode += rawCode[codeStart:codeEnd]
        return expectedLoader, invariantCode

    def _checkCode(self, rawCode):
        md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_16)
        md.detail = True

        checkJmp = True
        for i in md.disasm(rawCode, 0):
            # Check for JUMPs and CALLs before the first PUSH/RET.
            if checkJmp and len(i.groups) > 0:
                # Group check if available
                if hasattr(capstone.x86, 'X86_GRP_CALL') and hasattr(
                        capstone.x86, 'X86_GRP_RET'):
                    if capstone.x86.X86_GRP_CALL in i.groups or capstone.x86.X86_GRP_JUMP in i.groups:
                        self._suspiciousBehaviour.append(
                            'JMP or CALL before relocation')
                        checkJmp = False
                    elif capstone.x86.X86_GRP_RET in i.groups:
                        # Stop search after the first PUSH/RET
                        checkJmp = False
                # Manual check in case capstone version doesn't support CALL and RET groups
                else:
                    if i.mnemonic[0] == 'j' or i.mnemonic == 'call':
                        self._suspiciousBehaviour.append(
                            'JMP or CALL before relocation')
                        checkJmp = False
                    elif i.mnemonic[:3] == 'ret':
                        # Stop search after the first PUSH/RET
                        checkJmp = False

            # Check for unknown interrupt
            if i.mnemonic == 'int' and i.bytes[1] not in (0x10, 0x13, 0x18,
                                                          0x1a):
                self._suspiciousBehaviour.append(
                    'Unknown Interrupt : {0:#x}'.format(i.bytes[1]))
コード例 #9
0
class ASLParser(interface.FileObjectParser):
    """Parser for ASL log files."""

    _INITIAL_FILE_OFFSET = None

    NAME = u'asl_log'
    DESCRIPTION = u'Parser for ASL log files.'

    _ASL_MAGIC = b'ASL DB\x00\x00\x00\x00\x00\x00'

    # ASL File header.
    # magic: magic number that identify ASL files.
    # version: version of the file.
    # offset: first record in the file.
    # timestamp: time when the first entry was written.
    #     Contains the number of seconds since January 1, 1970 00:00:00 UTC.
    # last_offset: last record in the file.
    _ASL_HEADER_STRUCT = construct.Struct(u'asl_header_struct',
                                          construct.String(u'magic', 12),
                                          construct.UBInt32(u'version'),
                                          construct.UBInt64(u'offset'),
                                          construct.UBInt64(u'timestamp'),
                                          construct.UBInt32(u'cache_size'),
                                          construct.UBInt64(u'last_offset'),
                                          construct.Padding(36))

    # The record structure is:
    # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry]
    # Record static structure.
    # tam_entry: it contains the number of bytes from this file position
    #            until the end of the record, without counts itself.
    # next_offset: next record. If is equal to 0x00, it is the last record.
    # asl_message_id: integer that has the numeric identification of the event.
    # timestamp: the entry creation date and time.
    #     Contains the number of seconds since January 1, 1970 00:00:00 UTC.
    # nanosecond: nanosecond to add to the timestamp.
    # level: level of priority.
    # pid: process identification that ask to save the record.
    # uid: user identification that has lunched the process.
    # gid: group identification that has lunched the process.
    # read_uid: identification id of a user. Only applied if is not -1 (all FF).
    #           Only root and this user can read the entry.
    # read_gid: the same than read_uid, but for the group.
    _ASL_RECORD_STRUCT = construct.Struct(u'asl_record_struct',
                                          construct.Padding(2),
                                          construct.UBInt32(u'tam_entry'),
                                          construct.UBInt64(u'next_offset'),
                                          construct.UBInt64(u'asl_message_id'),
                                          construct.UBInt64(u'timestamp'),
                                          construct.UBInt32(u'nanosec'),
                                          construct.UBInt16(u'level'),
                                          construct.UBInt16(u'flags'),
                                          construct.UBInt32(u'pid'),
                                          construct.UBInt32(u'uid'),
                                          construct.UBInt32(u'gid'),
                                          construct.UBInt32(u'read_uid'),
                                          construct.UBInt32(u'read_gid'),
                                          construct.UBInt64(u'ref_pid'))

    _ASL_RECORD_STRUCT_SIZE = _ASL_RECORD_STRUCT.sizeof()

    # 8-byte fields, they can be:
    # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String].
    # - Integer: integer that has the byte position in the file that points
    #            to an ASL_RECORD_DYN_VALUE struct. If the value of the integer
    #            is equal to 0, it means that it has not data (skip).

    # If the field is a String, we use this structure to decode each
    # integer byte in the corresponding character (ASCII Char).
    _ASL_OCTET_STRING = construct.ExprAdapter(
        construct.Octet(u'string'),
        encoder=lambda obj, ctx: ord(obj),
        decoder=lambda obj, ctx: chr(obj))

    # Field string structure. If the first bit is 1, it means that it
    # is a String (1000) = 8, then the next nibble has the number of
    # characters. The last 7 bytes are the number of bytes.
    _ASL_STRING = construct.BitStruct(
        u'string', construct.Flag(u'type'), construct.Bits(u'filler', 3),
        construct.If(lambda ctx: ctx.type, construct.Nibble(u'string_length')),
        construct.If(lambda ctx: ctx.type,
                     construct.Array(7, _ASL_OCTET_STRING)))

    # 8-byte pointer to a byte position in the file.
    _ASL_POINTER = construct.UBInt64(u'pointer')

    # Dynamic data structure pointed by a pointer that contains a String:
    # [2 bytes padding][4 bytes size of String][String].
    _ASL_RECORD_DYN_VALUE = construct.Struct(
        u'asl_record_dyn_value', construct.Padding(2),
        construct.UBInt32(u'size'),
        construct.Bytes(u'value', lambda ctx: ctx.size))

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses an ALS file-like object.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      file_object: a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self._ASL_HEADER_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse ASL Header with error: {0:s}.'.format(
                    exception))

        if header.magic != self._ASL_MAGIC:
            raise errors.UnableToParseFile(
                u'Not an ASL Header, unable to parse.')

        offset = header.offset
        if not offset:
            return

        header_last_offset = header.last_offset

        previous_offset = offset
        event_object, offset = self.ReadASLEvent(parser_mediator, file_object,
                                                 offset)
        while event_object:
            # Sanity check, the last read element must be the same as
            # indicated by the header.
            if offset == 0 and previous_offset != header_last_offset:
                parser_mediator.ProduceParseError(
                    u'Unable to parse header. Last element header does not match '
                    u'header offset.')
            previous_offset = offset
            event_object, offset = self.ReadASLEvent(parser_mediator,
                                                     file_object, offset)

    def ReadASLEvent(self, parser_mediator, file_object, offset):
        """Reads an ASL record at a specific offset.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      file_object: a file-like object that points to an ASL file.
      offset: an integer containing the offset of the ASL record.

    Returns:
      A tuple of an event object extracted from the ASL record,
      and the offset to the next ASL record in the file.
    """
        # The heap of the entry is saved to try to avoid seek (performance issue).
        # It has the real start position of the entry.
        dynamic_data_offset = file_object.tell()

        try:
            dynamic_data = file_object.read(offset - dynamic_data_offset)
        except IOError as exception:
            parser_mediator.ProduceParseError(
                u'unable to read ASL record dynamic data with error: {0:s}'.
                format(exception))
            return None, None

        if not offset:
            return None, None

        try:
            record_struct = self._ASL_RECORD_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceParseError(
                u'unable to parse ASL record with error: {0:s}'.format(
                    exception))
            return None, None

        # Variable tam_fields = is the real length of the dynamic fields.
        # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before]
        # In Record_Struct we have a field called tam_entry, where it has the number
        # of bytes until the end of the entry from the position that the field is.
        # The tam_entry is between the 2th and the 6th byte in the [Record_Struct].
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # Also, we do not need [Point_Entry_Before] and then we delete the size of
        # [Point_Entry_Before] that it is 8 bytes (8):
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8
        # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2
        tam_fields = record_struct.tam_entry - self._ASL_RECORD_STRUCT_SIZE - 2

        # Dynamic part of the entry that contains minimal four fields of 8 bytes
        # plus 2 x [8 bytes] fields for each extra ASL_Field.
        # The four first fields are always the Host, Sender, Facility and Message.
        # After the four first fields, the entry might have extra ASL_Fields.
        # For each extra ASL_field, it has a pair of 8-byte fields where the first
        # 8 bytes contains the name of the extra ASL_field and the second 8 bytes
        # contains the text of the extra field.
        # All of this 8-byte field can be saved using one of these three different
        # types:
        # - Null value ('0000000000000000'): nothing to do.
        # - String: It is string if first bit = 1 or first nibble = 8 (1000).
        #           Second nibble has the length of string.
        #           The next 7 bytes have the text characters of the string
        #           padding the end with null characters: '0x00'.
        #           Example: [8468 6964 6400 0000]
        #                    [8] String, [4] length, value: [68 69 64 64] = hidd.
        # - Pointer: static position in the file to a special struct
        #            implemented as an ASL_RECORD_DYN_VALUE.
        #            Example: [0000 0000 0000 0077]
        #            It points to the file position 0x077 that has a
        #            ASL_RECORD_DYN_VALUE structure.
        values = []
        while tam_fields > 0:
            try:
                field_data = file_object.read(8)
            except IOError as exception:
                parser_mediator.ProduceParseError(
                    u'unable to read ASL field with error: {0:s}'.format(
                        exception))
                return None, None

            # Try to read the field data as a string.
            try:
                asl_string_struct = self._ASL_STRING.parse(field_data)
                string_data = b''.join(
                    asl_string_struct.string[0:asl_string_struct.
                                             string_length])
                values.append(string_data)

                # Go to parse the next extra field.
                tam_fields -= 8
                continue

            except ValueError:
                pass

            # If the field is not a string it must be a pointer.
            try:
                pointer_value = self._ASL_POINTER.parse(field_data)
            except ValueError as exception:
                parser_mediator.ProduceParseError(
                    u'unable to parse ASL field with error: {0:s}'.format(
                        exception))
                return None, None

            if not pointer_value:
                # Next extra field: 8 bytes more.
                tam_fields -= 8
                continue

            # The next IF ELSE is only for performance issues, avoiding seek.
            # If the pointer points a lower position than where the actual entry
            # starts, it means that it points to a previous entry.
            pos = pointer_value - dynamic_data_offset

            # Greater or equal 0 means that the data is in the actual entry.
            if pos >= 0:
                try:
                    dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse(
                        dynamic_data[pos:])
                    dyn_value = dyn_value_struct.value.partition(b'\x00')[0]
                    values.append(dyn_value)

                except (IOError, construct.FieldError) as exception:
                    parser_mediator.ProduceParseError((
                        u'unable to parse ASL record dynamic value with error: '
                        u'{0:s}').format(exception))
                    return None, None

            else:
                # Only if it is a pointer that points to the
                # heap from another entry we use the seek method.
                main_position = file_object.tell()

                # If the pointer is in a previous entry.
                if main_position > pointer_value:
                    file_object.seek(pointer_value - main_position,
                                     os.SEEK_CUR)

                    try:
                        dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream(
                            file_object)
                        dyn_value = dyn_value_struct.value.partition(
                            b'\x00')[0]
                        values.append(dyn_value)

                    except (IOError, construct.FieldError):
                        parser_mediator.ProduceParseError((
                            u'the pointer at {0:d} (0x{0:08x}) points to invalid '
                            u'information.'
                        ).format(main_position - self._ASL_POINTER.sizeof()))

                    # Come back to the position in the entry.
                    _ = file_object.read(main_position - file_object.tell())

                else:
                    _ = file_object.read(pointer_value - main_position)

                    dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream(
                        file_object)
                    dyn_value = dyn_value_struct.value.partition(b'\x00')[0]
                    values.append(dyn_value)

                    # Come back to the position in the entry.
                    file_object.seek(main_position - file_object.tell(),
                                     os.SEEK_CUR)

            # Next extra field: 8 bytes more.
            tam_fields -= 8

        # Read the last 8 bytes of the record that points to the previous entry.
        _ = file_object.read(8)

        # Parsed section, we translate the read data to an appropriate format.
        micro_seconds, _ = divmod(record_struct.nanosec, 1000)

        # Parsing the dynamic values (text or pointers to position with text).
        # The first four are always the host, sender, facility, and message.
        number_of_values = len(values)
        if number_of_values < 4:
            parser_mediator.ProduceParseError(
                u'less than four values read from an ASL event.')

        computer_name = u'N/A'
        sender = u'N/A'
        facility = u'N/A'
        message = u'N/A'

        if number_of_values >= 1:
            computer_name = values[0].decode(u'utf-8')

        if number_of_values >= 2:
            sender = values[1].decode(u'utf-8')

        if number_of_values >= 3:
            facility = values[2].decode(u'utf-8')

        if number_of_values >= 4:
            message = values[3].decode(u'utf-8')

        # If the entry has an extra fields, they works as a pairs:
        # The first is the name of the field and the second the value.
        extra_information = u''
        if number_of_values > 4 and number_of_values % 2 == 0:
            # Taking all the extra attributes and merging them together,
            # eg: a = [1, 2, 3, 4] will look like "1: 2, 3: 4".
            try:
                extra_values = map(py2to3.UNICODE_TYPE, values[4:])
                extra_information = u', '.join(
                    map(u': '.join, zip(extra_values[0::2],
                                        extra_values[1::2])))
            except UnicodeDecodeError as exception:
                parser_mediator.ProduceParseError(
                    u'Unable to decode all ASL values in the extra information fields.'
                )

        event_object = ASLEvent(record_struct.timestamp,
                                offset,
                                record_struct.asl_message_id,
                                record_struct.level,
                                record_struct.pid,
                                record_struct.uid,
                                record_struct.gid,
                                record_struct.read_uid,
                                record_struct.read_gid,
                                computer_name,
                                sender,
                                facility,
                                message,
                                extra_information,
                                micro_seconds=micro_seconds)
        parser_mediator.ProduceEvent(event_object)

        return (event_object, record_struct.next_offset)
コード例 #10
0
ファイル: flac.py プロジェクト: zpchavez/boottunes
class FlacReader:
    FRAME_HEADER = construct.Struct(
        'frame_header', construct.Bits('sync', 14),
        construct.Bits('reserved', 2), construct.Bits('block_size', 4),
        construct.Bits('sample_rate', 4),
        construct.Bits('channel_assignment', 4),
        construct.Bits('bits_per_sample', 3), construct.Padding(1),
        construct.IfThenElse(
            'total_channels', lambda ctx1: ctx1['channel_assignment'] <= 7,
            construct.Value('c', lambda ctx2: ctx2['channel_assignment'] + 1),
            construct.Value('c', lambda ctx3: 2)), UTF8('frame_number'),
        construct.IfThenElse(
            'extended_block_size', lambda ctx1: ctx1['block_size'] == 6,
            construct.Bits('b', 8),
            construct.If(lambda ctx2: ctx2['block_size'] == 7,
                         construct.Bits('b', 16))),
        construct.IfThenElse(
            'extended_sample_rate', lambda ctx1: ctx1['sample_rate'] == 12,
            construct.Bits('s', 8),
            construct.If(lambda ctx2: ctx2['sample_rate'] in (13, 14),
                         construct.Bits('s', 16))), construct.Bits('crc8', 8))

    UNARY = construct.Struct(
        'unary',
        construct.RepeatUntil(lambda obj, ctx: obj == '\x01',
                              construct.Field('bytes', 1)),
        construct.Value('value', lambda ctx: len(ctx['bytes']) - 1))

    SUBFRAME_HEADER = construct.Struct(
        'subframe_header', construct.Padding(1),
        construct.Bits('subframe_type', 6),
        construct.Flag('has_wasted_bits_per_sample'),
        construct.IfThenElse('wasted_bits_per_sample',
                             lambda ctx: ctx['has_wasted_bits_per_sample'],
                             PlusOne(Unary('value')),
                             construct.Value('value', lambda ctx2: 0)))

    GET_BLOCKSIZE_FROM_STREAMINFO = -1
    GET_8BIT_BLOCKSIZE_FROM_END_OF_HEADER = -2
    GET_16BIT_BLOCKSIZE_FROM_END_OF_HEADER = -3

    BLOCK_SIZE = (GET_BLOCKSIZE_FROM_STREAMINFO, 192, 576, 1152, 2304, 4608,
                  GET_8BIT_BLOCKSIZE_FROM_END_OF_HEADER,
                  GET_16BIT_BLOCKSIZE_FROM_END_OF_HEADER, 256, 512, 1024, 2048,
                  4096, 8192, 16384, 32768)

    GET_SAMPLE_SIZE_FROM_STREAMINFO = -1
    SAMPLE_SIZE = (GET_SAMPLE_SIZE_FROM_STREAMINFO, 8, 12, None, 16, 20, 24,
                   None)

    def FIXED0(subframe, residual, i):
        subframe.insert(i, residual[i])

    def FIXED1(subframe, residual, i):
        subframe.insert(i, subframe[i - 1] + residual[i])

    def FIXED2(subframe, residual, i):
        subframe.insert(i,
                        ((2 * subframe[i - 1]) - subframe[i - 2] + \
                         residual[i]))

    def FIXED3(subframe, residual, i):
        subframe.insert(i,
                        ((3 * subframe[i - 1]) - (3 * subframe[i - 2]) + \
                         subframe[i - 3] + residual[i]))

    def FIXED4(subframe, residual, i):
        subframe.insert(i,
                        ((4 * subframe[i - 1]) - (6 * subframe[i - 2]) + \
                         (4 * subframe[i - 3]) - subframe[i - 4] + residual[i]))

    #iterates over all of the channels, in order
    def MERGE_INDEPENDENT(channel_list):
        channel_data = [iter(c) for c in channel_list]

        while (True):
            for channel in channel_data:
                yield channel.next()

    def MERGE_LEFT(channel_list):
        channel_left = iter(channel_list[0])
        channel_side = iter(channel_list[1])

        while (True):
            left = channel_left.next()
            side = channel_side.next()

            yield left
            yield left - side

    def MERGE_RIGHT(channel_list):
        channel_side = iter(channel_list[0])
        channel_right = iter(channel_list[1])

        while (True):
            side = channel_side.next()
            right = channel_right.next()

            yield side + right
            yield right

    def MERGE_MID(channel_list):
        channel_mid = iter(channel_list[0])
        channel_side = iter(channel_list[1])

        while (True):
            mid = channel_mid.next()
            side = channel_side.next()

            mid = mid << 1
            mid |= (side & 0x1)

            yield (mid + side) >> 1
            yield (mid - side) >> 1

    CHANNEL_FUNCTIONS = (MERGE_INDEPENDENT, MERGE_INDEPENDENT,
                         MERGE_INDEPENDENT, MERGE_INDEPENDENT,
                         MERGE_INDEPENDENT, MERGE_INDEPENDENT,
                         MERGE_INDEPENDENT, MERGE_INDEPENDENT, MERGE_LEFT,
                         MERGE_RIGHT, MERGE_MID)

    FIXED_FUNCTIONS = (FIXED0, FIXED1, FIXED2, FIXED3, FIXED4)

    def __init__(self, flac_stream):
        self.stream = BufferedStream(flac_stream)
        self.streaminfo = None
        self.bitstream = None

        #ensure the file starts with 'fLaC'
        self.read_stream_marker()

        #initialize self.bitstream
        self.begin_bitstream()

        #find self.streaminfo in case we need it
        self.read_metadata_blocks()

    def close(self):
        if (self.bitstream != None):
            self.bitstream.close()
        else:
            self.stream.close()

    def read_stream_marker(self):
        if (self.stream.read(4) != 'fLaC'):
            raise FlacStreamException('invalid stream marker')

    def read_metadata_blocks(self):
        block = audiotools.FlacAudio.METADATA_BLOCK_HEADER.parse_stream(
            self.stream)
        while (block.last_block == 0):
            if (block.block_type == 0):
                self.streaminfo = audiotools.FlacAudio.STREAMINFO.parse_stream(
                    self.stream)
            else:
                self.stream.seek(block.block_length, 1)

            block = audiotools.FlacAudio.METADATA_BLOCK_HEADER.parse_stream(
                self.stream)
        self.stream.seek(block.block_length, 1)

    def begin_bitstream(self):
        import bitstream

        #self.bitstream = construct.BitStreamReader(self.stream)
        self.bitstream = bitstream.BitStreamReader(self.stream)

    def read_frame(self):
        self.stream.reset_buffer()

        try:
            header = FlacReader.FRAME_HEADER.parse_stream(self.bitstream)
        except construct.core.FieldError:
            return ""

        if (header.sync != 0x3FFE):
            raise FlacStreamException('invalid sync')

        if (crc8(self.stream.getvalue()[0:-1]) != header.crc8):
            raise FlacStreamException('crc8 checksum failed')

        #block_size tells us how many samples we need from each subframe
        block_size = FlacReader.BLOCK_SIZE[header.block_size]
        if (block_size == self.GET_BLOCKSIZE_FROM_STREAMINFO):
            block_size = self.streaminfo.maximum_blocksize

        elif ((block_size == self.GET_8BIT_BLOCKSIZE_FROM_END_OF_HEADER)
              or (block_size == self.GET_16BIT_BLOCKSIZE_FROM_END_OF_HEADER)):
            block_size = header.extended_block_size + 1

        #grab subframe data as 32-bit array objects
        subframe_data = []

        for channel_number in xrange(header.total_channels):
            subframe_data.append(
                self.read_subframe(header, block_size, channel_number))

        crc16sum = crc16(self.stream.getvalue())

        #try to byte-align the stream
        if (len(self.bitstream.buffer) > 0):
            self.bitstream.read(len(self.bitstream.buffer))

        if (crc16sum != construct.Bits('crc16', 16).parse_stream(
                self.bitstream)):
            raise FlacStreamException('crc16 checksum failed')

        #convert our list of subframe data arrays into
        #a string of sample data
        if (FlacReader.SAMPLE_SIZE[header.bits_per_sample] == 16):
            merged_frames = array.array(
                'h', FlacReader.CHANNEL_FUNCTIONS[header.channel_assignment](
                    subframe_data))

            if (audiotools.BIG_ENDIAN):
                merged_frames.byteswap()

            return merged_frames.tostring()

        elif (FlacReader.SAMPLE_SIZE[header.bits_per_sample] == 8):
            merged_frames = array.array(
                'b', FlacReader.CHANNEL_FUNCTIONS[header.channel_assignment](
                    subframe_data))

            return merged_frames.tostring()

        else:
            if (FlacReader.SAMPLE_SIZE[header.bits_per_sample] == \
                self.GET_SAMPLE_SIZE_FROM_STREAMINFO):
                bits_per_sample = self.streaminfo.bits_per_sample + 1

            elif (FlacReader.SAMPLE_SIZE[header.bits_per_sample] == None):
                raise FlacStreamException('invalid bits per sample')

            else:
                bits_per_sample = FlacReader.SAMPLE_SIZE[
                    header.bits_per_sample]

            stream = construct.GreedyRepeater(
                construct.BitStruct(
                    'bits',
                    construct.Bits('value',
                                   bits_per_sample,
                                   swapped=True,
                                   signed=True)))

            return stream.build([
                construct.Container(value=v)
                for v in FlacReader.CHANNEL_FUNCTIONS[
                    header.channel_assignment](subframe_data)
            ])

    def read_subframe(self, frame_header, block_size, channel_number):
        subframe_header = \
                        FlacReader.SUBFRAME_HEADER.parse_stream(self.bitstream)

        #figure out the bits-per-sample of this subframe
        if ((frame_header.channel_assignment == 8) and (channel_number == 1)):
            #if channel is stored as left+difference
            #and this is the difference, add 1 bit
            bits_per_sample = FlacReader.SAMPLE_SIZE[
                frame_header.bits_per_sample] + 1

        elif ((frame_header.channel_assignment == 9)
              and (channel_number == 0)):
            #if channel is stored as difference+right
            #and this is the difference, add 1 bit
            bits_per_sample = FlacReader.SAMPLE_SIZE[
                frame_header.bits_per_sample] + 1

        elif ((frame_header.channel_assignment == 10)
              and (channel_number == 1)):
            #if channel is stored as average+difference
            #and this is the difference, add 1 bit
            bits_per_sample = FlacReader.SAMPLE_SIZE[
                frame_header.bits_per_sample] + 1

        else:
            #otherwise, use the number from the frame header
            bits_per_sample = FlacReader.SAMPLE_SIZE[
                frame_header.bits_per_sample]

        if (subframe_header.has_wasted_bits_per_sample):
            bits_per_sample -= subframe_header.wasted_bits_per_sample

        if (subframe_header.subframe_type == 0):
            subframe = self.read_subframe_constant(block_size, bits_per_sample)

        elif (subframe_header.subframe_type == 1):
            subframe = self.read_subframe_verbatim(block_size, bits_per_sample)

        elif ((subframe_header.subframe_type & 0x38) == 0x08):
            subframe = self.read_subframe_fixed(
                subframe_header.subframe_type & 0x07, block_size,
                bits_per_sample)

        elif ((subframe_header.subframe_type & 0x20) == 0x20):
            subframe = self.read_subframe_lpc(
                (subframe_header.subframe_type & 0x1F) + 1, block_size,
                bits_per_sample)

        else:
            raise FlacStreamException('invalid subframe type')

        if (subframe_header.has_wasted_bits_per_sample):
            return array.array('i', [
                i << subframe_header.wasted_bits_per_sample for i in subframe
            ])
        else:
            return subframe

    def read_subframe_constant(self, block_size, bits_per_sample):
        sample = construct.Bits('b',
                                bits_per_sample).parse_stream(self.bitstream)

        subframe = array.array('i', [sample] * block_size)

        return subframe

    def read_subframe_verbatim(self, block_size, bits_per_sample):
        return array.array(
            'i',
            construct.StrictRepeater(
                block_size,
                construct.Bits("samples", bits_per_sample,
                               signed=True)).parse_stream(self.bitstream))

    def read_subframe_fixed(self, order, block_size, bits_per_sample):
        samples = construct.StrictRepeater(
            order,
            construct.Bits("warm_up_samples", bits_per_sample, signed=True))

        subframe = array.array('i', samples.parse_stream(self.bitstream))

        residual = self.read_residual(block_size, order)

        fixed_func = self.FIXED_FUNCTIONS[order]

        for i in xrange(len(subframe), block_size):
            fixed_func(subframe, residual, i)

        return subframe

    def read_subframe_lpc(self, order, block_size, bits_per_sample):
        samples = construct.StrictRepeater(
            order,
            construct.Bits("warm_up_samples", bits_per_sample, signed=True))

        subframe = array.array('i', samples.parse_stream(self.bitstream))

        lpc_precision = construct.Bits('lpc_precision', 4).parse_stream(
            self.bitstream) + 1

        lpc_shift = construct.Bits('lpc_shift', 5).parse_stream(self.bitstream)

        coefficients = array.array(
            'i',
            construct.StrictRepeater(
                order,
                construct.Bits('coefficients', lpc_precision,
                               signed=True)).parse_stream(self.bitstream))

        residual = self.read_residual(block_size, order)

        for i in xrange(len(subframe), block_size):
            subframe.insert(i,
                            (sum(
                [coefficients[j] * subframe[i - j - 1] for j in
                 xrange(0,len(coefficients))]) >> lpc_shift) + \
                            residual[i])

        return subframe

    def read_residual(self, block_size, predictor_order):
        rice = array.array('i')

        #add some dummy rice so that the Rice index matches
        #that of the rest of the subframe
        for i in xrange(predictor_order):
            rice.append(0)

        coding_method = self.bitstream.read(2)
        if (coding_method == '\x00\x00'):
            rice2 = False
        elif (coding_method == '\x00\x01'):
            rice2 = True
        else:
            raise FlacStreamException('invalid residual coding method')

        partition_order = construct.Bits('partition_order',
                                         4).parse_stream(self.bitstream)

        if (partition_order > 0):
            total_samples = ((block_size / 2**partition_order) -
                             predictor_order)
            rice.extend(self.read_encoded_rice(total_samples, rice2))

            for i in xrange(1, 2**partition_order):
                total_samples = (block_size / 2**partition_order)

                rice.extend(self.read_encoded_rice(total_samples, rice2))
        else:
            rice.extend(
                self.read_encoded_rice(block_size - predictor_order, rice2))

        return rice

    def read_encoded_rice(self, total_samples, rice2=False):
        bin_to_int = construct.lib.binary.bin_to_int

        samples = array.array('i')

        if (not rice2):
            rice_parameter = construct.Bits('rice_parameter',
                                            4).parse_stream(self.bitstream)
        else:
            rice_parameter = construct.Bits('rice_parameter',
                                            5).parse_stream(self.bitstream)

        if (rice_parameter != 0xF):
            #a Rice encoded residual
            for x in xrange(total_samples):

                #count the number of 0 bits before the next 1 bit
                #(unary encoding)
                #to find our most significant bits
                msb = 0
                s = self.bitstream.read(1)
                while (s != '\x01'):
                    msb += 1
                    s = self.bitstream.read(1)

                #grab the proper number of least significant bits
                lsb = bin_to_int(self.bitstream.read(rice_parameter))

                #combine msb and lsb to get the Rice-encoded value
                value = (msb << rice_parameter) | lsb
                if ((value & 0x1) == 0x1):  #negative
                    samples.append(-(value >> 1) - 1)
                else:  #positive
                    samples.append(value >> 1)
        else:
            #unencoded residual

            bits_per_sample = construct.Bits('escape_code',
                                             5).parse_stream(self.bitstream)

            sample = construct.Bits("sample", bits_per_sample, signed=True)

            for x in xrange(total_samples):
                samples.append(sample.parse_stream(self.bitstream))

        return samples
コード例 #11
0
ファイル: flac.py プロジェクト: zpchavez/boottunes
    def read_frame(self):
        self.stream.reset_buffer()

        try:
            header = FlacReader.FRAME_HEADER.parse_stream(self.bitstream)
        except construct.core.FieldError:
            return ""

        if (header.sync != 0x3FFE):
            raise FlacStreamException('invalid sync')

        if (crc8(self.stream.getvalue()[0:-1]) != header.crc8):
            raise FlacStreamException('crc8 checksum failed')

        #block_size tells us how many samples we need from each subframe
        block_size = FlacReader.BLOCK_SIZE[header.block_size]
        if (block_size == self.GET_BLOCKSIZE_FROM_STREAMINFO):
            block_size = self.streaminfo.maximum_blocksize

        elif ((block_size == self.GET_8BIT_BLOCKSIZE_FROM_END_OF_HEADER)
              or (block_size == self.GET_16BIT_BLOCKSIZE_FROM_END_OF_HEADER)):
            block_size = header.extended_block_size + 1

        #grab subframe data as 32-bit array objects
        subframe_data = []

        for channel_number in xrange(header.total_channels):
            subframe_data.append(
                self.read_subframe(header, block_size, channel_number))

        crc16sum = crc16(self.stream.getvalue())

        #try to byte-align the stream
        if (len(self.bitstream.buffer) > 0):
            self.bitstream.read(len(self.bitstream.buffer))

        if (crc16sum != construct.Bits('crc16', 16).parse_stream(
                self.bitstream)):
            raise FlacStreamException('crc16 checksum failed')

        #convert our list of subframe data arrays into
        #a string of sample data
        if (FlacReader.SAMPLE_SIZE[header.bits_per_sample] == 16):
            merged_frames = array.array(
                'h', FlacReader.CHANNEL_FUNCTIONS[header.channel_assignment](
                    subframe_data))

            if (audiotools.BIG_ENDIAN):
                merged_frames.byteswap()

            return merged_frames.tostring()

        elif (FlacReader.SAMPLE_SIZE[header.bits_per_sample] == 8):
            merged_frames = array.array(
                'b', FlacReader.CHANNEL_FUNCTIONS[header.channel_assignment](
                    subframe_data))

            return merged_frames.tostring()

        else:
            if (FlacReader.SAMPLE_SIZE[header.bits_per_sample] == \
                self.GET_SAMPLE_SIZE_FROM_STREAMINFO):
                bits_per_sample = self.streaminfo.bits_per_sample + 1

            elif (FlacReader.SAMPLE_SIZE[header.bits_per_sample] == None):
                raise FlacStreamException('invalid bits per sample')

            else:
                bits_per_sample = FlacReader.SAMPLE_SIZE[
                    header.bits_per_sample]

            stream = construct.GreedyRepeater(
                construct.BitStruct(
                    'bits',
                    construct.Bits('value',
                                   bits_per_sample,
                                   swapped=True,
                                   signed=True)))

            return stream.build([
                construct.Container(value=v)
                for v in FlacReader.CHANNEL_FUNCTIONS[
                    header.channel_assignment](subframe_data)
            ])