def test_find_file_pattern_loop_boundary(self): for i in range(1000): data = b'x' * i + b'\r\n\r\nabcdefghijklmnop' f = io.BytesIO(data) self.assertEqual(i, util.find_file_pattern(f, b'\r\n\r\n'))
def load(cls, file_obj, length, field_cls, strict=True): '''Return a :class:`BlockWithPayload` :param file_obj: The file object :param length: How much to read from the file :param field_cls: The class or subclass of :class:`Fields` ''' binary_block = BinaryBlock() binary_block.set_file(file_obj.name or file_obj, file_obj.tell(), length) try: field_length = util.find_file_pattern(file_obj, FIELD_DELIM_BYTES, limit=length, inclusive=True) except ValueError: # No payload field_length = length errors = 'strict' if strict else 'replace' field_str = file_obj.read(field_length).decode(errors=errors) fields = field_cls.parse(field_str) payload_length = length - field_length payload = Payload() payload.set_file(file_obj.name or file_obj, offset=file_obj.tell(), length=payload_length) _logger.debug('Field length=%d', field_length) _logger.debug('Payload length=%d', payload_length) file_obj.seek(file_obj.tell() + payload_length) block = BlockWithPayload(fields, payload) block.binary_block = binary_block return block
def load(cls, file_obj, preserve_block=False, check_block_length=True): '''Parse and return a :class:`Record` :param file_object: A file-like object. :param preserve_block: If `True`, content blocks are not parsed for fields and payloads. Enabling this feature ensures preservation of content length and hash digests. :param check_block_length: If `True`, the length of the blocks are checked to a serialized version by Warcat. This can be useful for checking whether Warcat will output blocks with correct whitespace. ''' _logger.debug('Record start at %d 0x%x', file_obj.tell(), file_obj.tell()) record = Record() record.file_offset = file_obj.tell() header_length = util.find_file_pattern(file_obj, FIELD_DELIM_BYTES, inclusive=True) record.header = Header.parse(file_obj.read(header_length)) block_length = record.content_length _logger.debug('Block length=%d', block_length) if not preserve_block: content_type = record.header.fields.get('content-type') record.content_block = ContentBlock.load(file_obj, block_length, content_type) else: record.content_block = BinaryBlock.load(file_obj, block_length) if check_block_length: new_content_length = record.content_block.length if block_length != new_content_length: _logger.warn('Content block length changed from %d to %d', record.content_length, new_content_length) record.content_length = new_content_length return record
def test_find_file_pattern(self): f = io.BytesIO(b'abcdefg\r\n\r\nhijklmnop') offset = util.find_file_pattern(f, b'\r\n\r\n', inclusive=True) self.assertEqual(11, offset) self.assertEqual(0, f.tell())