Beispiel #1
0
    def test_find_file_pattern_loop_boundary(self):
        for i in range(1000):
            data = b'x' * i + b'\r\n\r\nabcdefghijklmnop'

            f = io.BytesIO(data)

            self.assertEqual(i, util.find_file_pattern(f, b'\r\n\r\n'))
Beispiel #2
0
    def load(cls, file_obj, length, field_cls, strict=True):
        '''Return a :class:`BlockWithPayload`

        :param file_obj: The file object
        :param length: How much to read from the file
        :param field_cls: The class or subclass of :class:`Fields`
        '''

        binary_block = BinaryBlock()
        binary_block.set_file(file_obj.name or file_obj, file_obj.tell(), length)

        try:
            field_length = util.find_file_pattern(file_obj, FIELD_DELIM_BYTES,
                limit=length, inclusive=True)
        except ValueError:
            # No payload
            field_length = length

        errors = 'strict' if strict else 'replace'
        field_str = file_obj.read(field_length).decode(errors=errors)
        fields = field_cls.parse(field_str)
        payload_length = length - field_length
        payload = Payload()

        payload.set_file(file_obj.name or file_obj, offset=file_obj.tell(),
            length=payload_length)
        _logger.debug('Field length=%d', field_length)
        _logger.debug('Payload length=%d', payload_length)

        file_obj.seek(file_obj.tell() + payload_length)

        block = BlockWithPayload(fields, payload)
        block.binary_block = binary_block

        return block
Beispiel #3
0
    def test_find_file_pattern_loop_boundary(self):
        for i in range(1000):
            data = b'x' * i + b'\r\n\r\nabcdefghijklmnop'

            f = io.BytesIO(data)

            self.assertEqual(i, util.find_file_pattern(f, b'\r\n\r\n'))
Beispiel #4
0
    def load(cls, file_obj, preserve_block=False, check_block_length=True):
        '''Parse and return a :class:`Record`

        :param file_object: A file-like object.
        :param preserve_block: If `True`, content blocks are not parsed
            for fields and payloads. Enabling this feature ensures
            preservation of content length and hash digests.
        :param check_block_length: If `True`, the length of the blocks are
            checked to a serialized version by Warcat. This can be useful for
            checking whether Warcat will output blocks with correct
            whitespace.
        '''

        _logger.debug('Record start at %d 0x%x', file_obj.tell(),
                      file_obj.tell())

        record = Record()
        record.file_offset = file_obj.tell()
        header_length = util.find_file_pattern(file_obj,
                                               FIELD_DELIM_BYTES,
                                               inclusive=True)
        record.header = Header.parse(file_obj.read(header_length))
        block_length = record.content_length

        _logger.debug('Block length=%d', block_length)

        if not preserve_block:
            content_type = record.header.fields.get('content-type')
            record.content_block = ContentBlock.load(file_obj, block_length,
                                                     content_type)
        else:
            record.content_block = BinaryBlock.load(file_obj, block_length)

        if check_block_length:
            new_content_length = record.content_block.length

            if block_length != new_content_length:
                _logger.warn('Content block length changed from %d to %d',
                             record.content_length, new_content_length)
                record.content_length = new_content_length

        return record
Beispiel #5
0
    def load(cls, file_obj, preserve_block=False, check_block_length=True):
        '''Parse and return a :class:`Record`

        :param file_object: A file-like object.
        :param preserve_block: If `True`, content blocks are not parsed
            for fields and payloads. Enabling this feature ensures
            preservation of content length and hash digests.
        :param check_block_length: If `True`, the length of the blocks are
            checked to a serialized version by Warcat. This can be useful for
            checking whether Warcat will output blocks with correct
            whitespace.
        '''

        _logger.debug('Record start at %d 0x%x', file_obj.tell(),
            file_obj.tell())

        record = Record()
        record.file_offset = file_obj.tell()
        header_length = util.find_file_pattern(file_obj, FIELD_DELIM_BYTES,
            inclusive=True)
        record.header = Header.parse(file_obj.read(header_length))
        block_length = record.content_length

        _logger.debug('Block length=%d', block_length)

        if not preserve_block:
            content_type = record.header.fields.get('content-type')
            record.content_block = ContentBlock.load(file_obj, block_length,
                content_type)
        else:
            record.content_block = BinaryBlock.load(file_obj, block_length)

        if check_block_length:
            new_content_length = record.content_block.length

            if block_length != new_content_length:
                _logger.warn('Content block length changed from %d to %d',
                    record.content_length, new_content_length)
                record.content_length = new_content_length

        return record
Beispiel #6
0
    def load(cls, file_obj, length, field_cls, strict=True):
        '''Return a :class:`BlockWithPayload`

        :param file_obj: The file object
        :param length: How much to read from the file
        :param field_cls: The class or subclass of :class:`Fields`
        '''

        binary_block = BinaryBlock()
        binary_block.set_file(file_obj.name or file_obj, file_obj.tell(),
                              length)

        try:
            field_length = util.find_file_pattern(file_obj,
                                                  FIELD_DELIM_BYTES,
                                                  limit=length,
                                                  inclusive=True)
        except ValueError:
            # No payload
            field_length = length

        errors = 'strict' if strict else 'replace'
        field_str = file_obj.read(field_length).decode(errors=errors)
        fields = field_cls.parse(field_str)
        payload_length = length - field_length
        payload = Payload()

        payload.set_file(file_obj.name or file_obj,
                         offset=file_obj.tell(),
                         length=payload_length)
        _logger.debug('Field length=%d', field_length)
        _logger.debug('Payload length=%d', payload_length)

        file_obj.seek(file_obj.tell() + payload_length)

        block = BlockWithPayload(fields, payload)
        block.binary_block = binary_block

        return block
Beispiel #7
0
 def test_find_file_pattern(self):
     f = io.BytesIO(b'abcdefg\r\n\r\nhijklmnop')
     offset = util.find_file_pattern(f, b'\r\n\r\n', inclusive=True)
     self.assertEqual(11, offset)
     self.assertEqual(0, f.tell())
Beispiel #8
0
 def test_find_file_pattern(self):
     f = io.BytesIO(b'abcdefg\r\n\r\nhijklmnop')
     offset = util.find_file_pattern(f, b'\r\n\r\n', inclusive=True)
     self.assertEqual(11, offset)
     self.assertEqual(0, f.tell())