Python RarStream.seek Examples

Programming Language: Python

Namespace/Package Name: rescene.rarstream

Class/Type: RarStream

Method/Function: seek

Examples at hotexamples.com: 9

Python RarStream.seek - 9 examples found. These are the top rated real world Python examples of rescene.rarstream.RarStream.seek extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RarStream(12)

read(10)

seek(9)

tell(9)

close(8)

length(1)

list_files(1)

readable(1)

seekable(1)

Example #1

Show file

File: test_rarstream.py Project: zc0nf/pyrescene

    def test_file(self):
        """ Tests if the file in the rar archive is the same as the
			extracted version. """
        rar_file = os.path.join(self.path, "store_little", "store_little.rar")
        txt_file = os.path.join(self.path, "txt", "little_file.txt")
        rs = RarStream(rar_file)
        with open(txt_file, "rb") as tfile:
            self.assertEqual(rs.read(), tfile.read())

        rar_file = os.path.join(self.path,
                                "store_split_folder_old_srrsfv_windows",
                                "winrar2.80.rar")
        txt_file = os.path.join(self.path, "txt", "unicode_dos.nfo")
        rs = RarStream(rar_file, "unicode_dos.nfo")  # 3.316 bytes
        with open(txt_file, "rb") as tfile:
            rs.seek(3316)
            self.assertEqual(rs.seek(6316), rs.tell())
            rs.seek(3312)
            tfile.seek(3336, os.SEEK_SET)
            tfile.seek(6336, os.SEEK_SET)
            rs.read(4)
            rs.seek(0)
            tfile.seek(0)
            self.assertEqual(rs.read(), tfile.read())
            tfile.seek(-20, os.SEEK_END)
            self.assertEqual(rs.seek(-20, os.SEEK_END), tfile.tell())
            self.assertEqual(rs.read(), tfile.read())
        rs.close()
        self.assertEqual(rs.closed, True, "Stream not closed")

        txt_file = os.path.join(self.path, "txt", "unicode_mac.nfo")
        rs = RarStream(rar_file, "unicode_mac.nfo")
        with open(txt_file, "rb") as tfile:
            tfile.seek(3000)
            tfile.read()
            tfile.seek(333)
            rs.seek(333)
            self.assertEqual(rs.read(), tfile.read())

Example #2

Show file

File: test_rarstream.py Project: zc0nf/pyrescene

 def test_folder_multiple(self):
     # with path and multiple files in folder / split volumes
     rs = RarStream(
         os.path.join(self.path, self.folder, "store_split_folder.rar"),
         "txt/users_manual4.00.txt")
     with open(os.path.join(self.path, "txt", "users_manual4.00.txt"),
               "rb") as txt_file:
         # + other tests to increase code coverage
         self.assertEqual(rs.read(), txt_file.read())
         self.assertEqual(rs.tell(), txt_file.tell())
         self.assertEqual(rs.length(), txt_file.tell())
         self.assertEqual(rs.readable(), True)
         self.assertEqual(rs.seekable(), True)
         self.assertEqual(rs.read(), b"")
         self.assertEqual(rs.read(), b"")
         rs.seek(0, os.SEEK_SET)
         rs.read(2)
         rs.seek(0, os.SEEK_END)
         self.assertRaises(IndexError, rs.seek, -1)
     self.assertEqual(rs.list_files(), [
         "txt\\empty_file.txt", "txt\\little_file.txt",
         "txt\\users_manual4.00.txt"
     ])
     self.assertRaises(NotImplementedError, rs.readinto, "")

Example #3

Show file

File: m2ts.py Project: zc0nf/pyrescene

class M2tsReader(object):
    """Implements a simple Reader class that reads M2TS files."""
    def __init__(self,
                 read_mode=M2tsReadMode.M2ts,
                 path=None,
                 stream=None,
                 match_offset=0,
                 archived_file_name=""):
        assert path or stream
        if path:
            if is_rar(path):
                self._stream = RarStream(path, archived_file_name)
            else:
                self._stream = open(path, 'rb')
        elif stream:
            self._stream = stream
        self._stream.seek(0, 2)
        self._file_length = self._stream.tell()
        self.mode = read_mode
        self.read_done = True

        self.current_packet = None
        self.current_offset = 0

        if self._file_length < 192:
            raise InvalidDataException("File too small")

        # faster reconstructing when match_offset is provided
        if match_offset >= 8 and match_offset < self._file_length:
            # use lowest muliple of 192 < offset as a starting point
            start = match_offset // PACKET_SIZE
            self._stream.seek(start)
            self.current_offset = start
        elif match_offset >= self._file_length:
            msg = "Invalid match offset for video: {0}".format(match_offset)
            raise InvalidMatchOffsetException(msg)
        else:
            # no useful matching offset against the main movie file
            self._stream.seek(0)

    def read(self):
        # read() is invalid at this time: read_contents() or
        # skip_contents() must be called before read() can be called again
        assert self.read_done or self.mode == M2tsReadMode.SRS

        self.read_done = False
        self._stream.seek(self.current_offset)
        # TP_extra_header (4 Bytes) + MPEG-2 transport stream header (4 B)
        header = self._stream.read(HEADER_SIZE)

        if not len(header):
            return False

        if M2tsReadMode.M2ts:
            if self.current_offset + PACKET_SIZE > self._file_length:
                msg = "Invalid packet length at 0x{0:08X}"
                raise InvalidDataException(msg.format(self.current_offset))
        else:
            # SRS header data must be a multiple of 8
            if self.current_offset + HEADER_SIZE > self._file_length:
                raise InvalidDataException("Broken SRS file")

        if header[5] == b'\x47':
            msg = "Invalid synchronization byte at 0x{0:08X}"
            raise InvalidDataException(msg.format(self.current_offset))

        packet = Packet(self.current_offset)
        packet.raw_header = header
        (byte8, ) = S_BYTE.unpack_from(header, 7)
        # two bits: bit 3 and 4 of last byte in the header
        packet.adaptation_field = (byte8 & 0x30) >> 4
        # last four bits of last byte in the header
        packet.continuity_counter = (byte8 & 0xF)
        (byte67, ) = S_SHORT.unpack_from(header, 5)
        packet.pid = byte67 & 0x1FFF

        self.current_offset += PACKET_SIZE
        self.current_packet = packet

        # 		if _DEBUG and packet.adaptation_field != 1:
        # # 		if _DEBUG:
        # 			print(packet)
        # 			print(bin(byte67))

        return True

    def read_contents(self):
        """Reads the transport stream packet payload. (no 8B header)"""
        buff = b""
        if self.read_done:
            self._stream.seek(-PAYLOAD_SIZE, os.SEEK_CUR)
        self.read_done = True
        if self.mode != M2tsReadMode.SRS:
            buff = self._stream.read(PAYLOAD_SIZE)
        return buff

    def skip_contents(self):
        """Skips over the payload data to the next packet."""
        if not self.read_done:
            self.read_done = True
            if self.mode != M2tsReadMode.SRS:
                self._stream.seek(PAYLOAD_SIZE, os.SEEK_CUR)

    def close(self):
        try:  # close the file/stream
            self._stream.close()
        except:
            pass

    def __del__(self):
        try:  # close the file/stream
            self._stream.close()
        except:
            pass

Example #4

Show file

class AsfReader(object):
    """Implements a simple Reader class that reads through WMV 
	or WMV-SRS files one Object at a time."""
    def __init__(self,
                 read_mode,
                 path=None,
                 stream=None,
                 archived_file_name=""):
        assert path or stream, "missing ASF reader input"
        if path:
            if is_rar(path):
                self._asf_stream = RarStream(path, archived_file_name)
            else:
                self._asf_stream = open(path, 'rb')
        elif stream:
            self._asf_stream = stream
        self._asf_stream.seek(0, 2)
        self._file_length = self._asf_stream.tell()
        self._asf_stream.seek(0)
        self.mode = read_mode

        self.read_done = True
        self.current_object = None
        self.object_guid = None

    def read(self):
        # "Read() is invalid at this time", "MoveToChild(), ReadContents(), or
        # SkipContents() must be called before Read() can be called again")
        assert self.read_done or (self.mode == AsfReadMode.SRS and
                                  self.object_guid == GUID_DATA_OBJECT), \
                                  "AsfReader read() is invalid at this time"

        object_start_position = self._asf_stream.tell()
        self.current_object = None
        self.read_done = False

        # no room for GUID (16B) and size (8B) of the object
        if object_start_position + 24 > self._file_length:
            return False

        self._atom_header = self._asf_stream.read(24)
        # 16 bytes for GUID, 8 bytes for object size
        self.object_guid, size = struct.unpack("<16sQ", self._atom_header)

        # sanity check on object length
        # Skip check on GUID_DATA_OBJECT so we can still report expected size.
        # This is only applied on samples,
        # since a partial movie might still be useful.
        end_offset = object_start_position + size
        if (self.mode == AsfReadMode.Sample
                and self.object_guid != GUID_DATA_OBJECT
                and end_offset > self._file_length):
            raise InvalidDataException("Invalid object length at 0x%08X" %
                                       object_start_position)

        if self.object_guid == GUID_HEADER_OBJECT:
            self._atom_header += self._asf_stream.read(6)
        elif self.object_guid == GUID_DATA_OBJECT:
            self._atom_header += self._asf_stream.read(26)

        self.current_object = Object(size, self.object_guid)
        self.current_object.raw_header = self._atom_header
        self.current_object.start_pos = object_start_position

        # Calculate the size for the data object in SRS mode
        if (self.mode == AsfReadMode.SRS
                and self.object_guid == GUID_DATA_OBJECT):
            # size of the data object cannot be relied upon
            # so change size and end_offset
            o = self.current_object

            size = len(o.raw_header)
            i = 16 + 8 + 16
            (total_data_packets, ) = S_LONGLONG.unpack_from(o.raw_header, i)
            # data packet/media object size
            psize = (o.size - len(o.raw_header)) // total_data_packets
            rp_offsets = 0
            start = o.start_pos + len(o.raw_header)
            for i in range(total_data_packets):
                # calculate real packet size
                packet = AsfDataPacket()
                packet.data_file_offset = start + rp_offsets
                self._asf_stream.seek(packet.data_file_offset)
                # just read all of it to make it easier
                # SRS files are small anyway
                packet.data = self._asf_stream.read()
                # 				packet.data_size = len(data) # psize

                s = asf_data_get_packet(packet, psize, AsfReadMode.SRS)
                rp_offsets += s

            self.current_object.osize = self.current_object.size
            self.current_object.size = rp_offsets + size

        self._asf_stream.seek(object_start_position, os.SEEK_SET)

        # New top-level objects should be added only between the
        # Data Object and Index Object(s).

        return True

    def read_contents(self):
        # if read_done is set, we've already read or skipped it.
        # back up and read again?
        if self.read_done:
            self._asf_stream.seek(self.current_object.start_pos, os.SEEK_SET)

        self.read_done = True

        # skip header bytes
        hl = len(self.current_object.raw_header)
        self._asf_stream.seek(hl, os.SEEK_CUR)
        buff = self._asf_stream.read(self.current_object.size - hl)
        return buff

    def read_data_part(self, offset, length):
        if (offset + length == self.current_object.start_pos +
                self.current_object.size):
            self.read_done = True
        self._asf_stream.seek(offset, os.SEEK_SET)
        return self._asf_stream.read(length)

    def skip_contents(self):
        if not self.read_done:
            self.read_done = True
            self._asf_stream.seek(
                self.current_object.start_pos + self.current_object.size,
                os.SEEK_SET)

    def move_to_child(self):
        self.read_done = True
        # skip the header bytes
        hl = len(self.current_object.raw_header)
        self._asf_stream.seek(hl, os.SEEK_CUR)

    def close(self):
        try:  # close the file/stream
            self._asf_stream.close()
        except:
            pass

    def __del__(self):
        try:  # close the file/stream
            self._asf_stream.close()
        except:
            pass

Example #5

Show file

class StreamReader(object):
    """Implements a simple Reader class that reads STREAM-SRS files."""
    def __init__(self, path=None, stream=None, archived_file_name=""):
        assert path or stream
        if path:
            if is_rar(path):
                self._stream = RarStream(path, archived_file_name)
            else:
                self._stream = open(path, 'rb')
        elif stream:
            self._stream = stream
        self._stream.seek(0, 2)
        self._file_length = self._stream.tell()
        self._stream.seek(0)

        self.current_block = None
        self.blocks = []

        pos = 0
        while pos < self._file_length:
            if pos + 8 > self._file_length:
                raise InvalidDataException("SRS file too small!")

            # header: block signature
            marker = self._stream.read(4)
            if pos == 0 and marker not in (STREAM_MARKER, M2TS_MARKER):
                raise InvalidDataException("Not a stream or m2ts SRS file!")
            if marker not in (b"STRM", b"SRSF", b"SRST", b"M2TS", b"HDRS"):
                print("Unknown header block encountered")
            else:
                marker = marker.decode("ascii")

            # header: block size
            (size, ) = S_LONG.unpack(self._stream.read(4))
            block = Block(size, marker, pos)
            self.blocks.append(block)
            if _DEBUG:
                print(block)

            if size == 0 and pos != 0:
                # only allowed for the marker block
                raise InvalidDataException("SRS size field is zero")

            pos += size
            if pos > self._file_length:
                raise InvalidDataException("SRS file too small!")

            self._stream.seek(pos)
        self._stream.seek(0)

    def read(self):
        for block in self.blocks:
            self.current_block = block
            yield block

    def read_contents(self):
        """Skips the marker and size fields"""
        self._stream.seek(self.current_block.start_pos + 8, os.SEEK_SET)
        return self._stream.read(self.current_block.size - 8)

    def close(self):
        try:  # close the file/stream
            self._stream.close()
        except:
            pass

    def __del__(self):
        try:  # close the file/stream
            self._stream.close()
        except:
            pass

Example #6

Show file

class Mp3Reader(object):
	"""Implements a simple Reader class that reads through MP3 
	or MP3-SRS files one block at a time."""
	def __init__(self, path=None, stream=None, archived_file_name=""):
		assert path or stream
		if path:
			if is_rar(path):
				self._mp3_stream = RarStream(path, archived_file_name)
			else:
				self._mp3_stream = open(path, 'rb')
		elif stream:
			self._mp3_stream = stream
		self._mp3_stream.seek(0, 2)  # reset on ID3v2 tag search
		self._file_length = self._mp3_stream.tell()

		self.current_block = None

		self.blocks = []
		begin_main_content = 0

		# easier for corner case ("ID3" multiple times before sync)
		last_id3v2 = None

		# parse the whole file immediately!
		# 1) check for ID3v2 (beginning of mp3 file)
		# The ID3v2 tag size is the size of the complete tag after
		# unsychronisation, including padding, excluding the header but not
		# excluding the extended header (total tag size - 10). Only 28 bits
		# (representing up to 256MB) are used in the size description to avoid
		# the introduction of 'false syncsignals'.
		# http://id3.org/id3v2.4.0-structure
		while True:  # tag should be here only once
			# detect repeating leading ID3 tags in the srs files
			startpos = begin_main_content
			self._mp3_stream.seek(startpos, os.SEEK_SET)
			if self._mp3_stream.read(3) == b"ID3":
				# skip ID3v2 version (2 bytes) and flags (1 byte)
				self._mp3_stream.seek(3, os.SEEK_CUR)
				sbytes = self._mp3_stream.read(4)
				size = decode_id3_size(sbytes)

				tag_size = 10 + size  # 3 + 3 + 4
				last_id3v2 = Block(tag_size, "ID3", startpos)
				self.blocks.append(last_id3v2)
				begin_main_content += tag_size
			else:
				break

		# 2) check for ID3v1 (last 128 bytes of mp3 file)
		end_meta_data_offset = self._file_length
		self._mp3_stream.seek(-128, os.SEEK_END)
		idv1_start_offset = self._mp3_stream.tell()
		first = self._mp3_stream.read(3)
		if first == b"TAG":
			idv1_block = Block(128, "TAG", idv1_start_offset)
			self.blocks.append(idv1_block)
			end_meta_data_offset = idv1_start_offset

		# 3) check for http://id3.org/Lyrics3v2
		# "The Lyrics3 block, after the MP3 audio and before the ID3 tag,
		# begins with the word "LYRICSBEGIN" after which a number of field
		# records follows. The Lyrics3 block ends with a six character size
		# descriptor and the string "LYRICS200". The size value includes the
		# "LYRICSBEGIN" string, but does not include the 6 character size
		# descriptor and the trailing "LYRICS200" string.
		if end_meta_data_offset - 6 - 9 >= 0:
			self._mp3_stream.seek(end_meta_data_offset - 6 - 9, os.SEEK_SET)
			lyrics_footer = self._mp3_stream.read(6 + 9)
			if lyrics_footer[6:] == b"LYRICS200":
				lyrics_size = int(lyrics_footer[:6])  # only header + body
				lyrics3v2_block = Block(lyrics_size + 6 + 9, "LYRICS200",
				                        end_meta_data_offset -
				                        (lyrics_size + 6 + 9))
				self.blocks.append(lyrics3v2_block)
				end_meta_data_offset -= (lyrics_size + 6 + 9)

		# 4) check for http://id3.org/Lyrics3
		if end_meta_data_offset - 9 >= 0:
			self._mp3_stream.seek(end_meta_data_offset - 9, os.SEEK_SET)
			if b"LYRICSEND" == self._mp3_stream.read(9):
				self._mp3_stream.seek(end_meta_data_offset - 5100, os.SEEK_SET)
				lyrics_data = self._mp3_stream.read(5100)
				index = lyrics_data.find(b"LYRICSBEGIN")
				if index == -1:
					raise InvalidDataException(
							"Unable to find start of LyricsV1 block")
				start_block = end_meta_data_offset - 5100 + index
				lyrics3_block = Block(end_meta_data_offset - start_block,
				                      "LYRICS", start_block)
				self.blocks.append(lyrics3_block)
				end_meta_data_offset -= lyrics3_block.size

		# 5) APE tags
		# "Tag size in bytes including footer and all tag items excluding
		# the header to be as compatible as possible with APE Tags 1.000"
		# "An APEv1 tag at the end of a file must have at least a footer, APEv1
		# tags may never be used at the beginning of a file
		# (unlike APEv2 tags)."
		if end_meta_data_offset - 32 >= 0:
			self._mp3_stream.seek(end_meta_data_offset - 32, os.SEEK_SET)
			if b"APETAGEX" == self._mp3_stream.read(8):
				(version,) = S_LONG.unpack(self._mp3_stream.read(4))
				if version == 2000:
					header = 32
				else:  # 1000
					header = 0
				(size,) = S_LONG.unpack(self._mp3_stream.read(4))
				start_block = end_meta_data_offset - size - header
				apev2_block = Block(end_meta_data_offset - start_block,
				                    "APE%s" % version, start_block)
				self.blocks.append(apev2_block)
				end_meta_data_offset -= apev2_block.size

		def marker_has_issues(marker):
			if len(marker) != 4:
				return True
			(sync,) = BE_SHORT.unpack(marker[:2])
			sync_bytes = sync & 0xFFE0 == 0xFFE0
			if not sync_bytes and marker not in (b"RIFF", b"SRSF"):
				return True
			return False

		# in between is SRS or MP3 data
		self._mp3_stream.seek(begin_main_content, os.SEEK_SET)
		marker = self._mp3_stream.read(4)

		if last_id3v2 and marker_has_issues(marker):
			# problem with (angelmoon)-hes_all_i_want_cd_pg2k-bmi
			# The .mp3 files contain ID3+nfo before the real ID3 starts
			# And it's also a RIFF mp3, so it won't play without removing
			# the bad initial tag first.
			# This can cause the space between the "ID3" and the end tag
			# to be empty. (or just wrong)
			# Mickey_K.-Distracted-(DNR019F8)-WEB-2008-B2R has the 'ID3' string
			# in the ID3v2 tag for 02-mickey_k.-distracted_-_dub_mix.mp3
			last_id3 = last_id3v2_before_sync(self._mp3_stream,
			                                  self._file_length)
			dupe_id3_string = last_id3 != last_id3v2.start_pos
			after_v2_tag = last_id3 >= last_id3v2.start_pos + last_id3v2.size
			if dupe_id3_string and after_v2_tag:
				# another 'ID3' string found after id3v2 tag
				self._mp3_stream.seek(last_id3 + 3 + 3, os.SEEK_SET)
				sbytes = self._mp3_stream.read(4)
				size = decode_id3_size(sbytes)

				begin_main_content = last_id3 + 10 + size  # 3 + 3 + 4
				# add extra amount of data to the last block
				last_id3v2.size = begin_main_content - last_id3v2.start_pos
			elif dupe_id3_string and not after_v2_tag:
				# another 'ID3' string found inside first id3v2 tag
				if begin_main_content > self._file_length:
					# first tag is corrupt by definition
					# assume latter tag to be the good one: parse it
					# skip 'ID3' + ID3v2 version (2 bytes) and flags (1 byte)
					self._mp3_stream.seek(last_id3 + 6, os.SEEK_SET)
					sbytes = self._mp3_stream.read(4)
					size = decode_id3_size(sbytes)
					tag_size = 10 + size  # 3 + 3 + 4
					last_id3v2 = Block(tag_size, "ID3", last_id3)
					self.blocks.append(last_id3v2)
					begin_main_content = last_id3 + tag_size

		self._mp3_stream.seek(begin_main_content, os.SEEK_SET)
		marker = self._mp3_stream.read(4)

		if not len(marker):
			# there still is something horribly wrong
			# (unless you think that an mp3 without any music data is possible)
			raise InvalidDataException("Tagging f****d up big time!")

		(sync,) = BE_SHORT.unpack(marker[:2])
		main_size = end_meta_data_offset - begin_main_content
		if marker[:3] == b"SRS":  # SRS data blocks
			cur_pos = begin_main_content
			while cur_pos < begin_main_content + main_size:
				self._mp3_stream.seek(cur_pos, os.SEEK_SET)
				# SRSF, SRST and SRSP
				try:
					marker = self._mp3_stream.read(4)
					# size includes the 8 bytes header
					(size,) = S_LONG.unpack(self._mp3_stream.read(4))
				except:
					raise InvalidDataException("Not enough SRS data")
				srs_block = Block(size, marker.decode("ascii"), cur_pos)
				self.blocks.append(srs_block)
				cur_pos += size
				if size == 0:
					raise InvalidDataException("SRS size field is zero")
				if size > begin_main_content + main_size:
					raise InvalidDataException("Broken SRS")
		elif sync & 0xFFE0 == 0xFFE0 or marker == b"RIFF":
			# first 11 bits all 1 for MP3 frame marker
			mp3_data_block = Block(main_size, "MP3", begin_main_content)
			self.blocks.append(mp3_data_block)
		else:
			print("WARNING: MP3 file is not valid!")
			data_block = Block(main_size, "MP3", begin_main_content)
			self.blocks.append(data_block)

		# the order of which we add blocks doesn't matter this way
		self.blocks.sort(key=lambda block: block.start_pos)

	def read(self):
		for block in self.blocks:
			self.current_block = block
			if _DEBUG:
				print(block)
			yield block

	def read_contents(self):
		self._mp3_stream.seek(self.current_block.start_pos, os.SEEK_SET)
		return self._mp3_stream.read(self.current_block.size)

	def read_part(self, size, offset=0):
		if (self.current_block.start_pos + offset + size >
			self.current_block.start_pos + self.current_block.size):
			raise ValueError("Can't read beyond end of block.")
		self._mp3_stream.seek(self.current_block.start_pos + offset, os.SEEK_SET)
		return self._mp3_stream.read(size)

	def close(self):
		try:  # close the file/stream
			self._mp3_stream.close()
		except:
			pass

	def __del__(self):
		try:  # close the file/stream
			self._mp3_stream.close()
		except:
			pass

Example #7

Show file

class EbmlReader(object):
    """Implements a simple Reader class that reads through MKV or 
	MKV-SRS files one element at a time."""
    def __init__(self,
                 read_mode,
                 path=None,
                 stream=None,
                 archived_file_name=""):
        assert path or stream
        self.element_header = b""  # 12 bytes

        self._ebml_stream = None
        self.mode = None
        self.read_done = True

        self.current_element = None
        self.element_type = None

        # when not empty: an expected file size has been printed
        # to stderr already when data was missing
        self.expected_file_size = ""

        if path:
            if is_rar(path):
                self._ebml_stream = RarStream(path, archived_file_name)
            else:
                self._ebml_stream = open(path, 'rb')
        elif stream:
            self._ebml_stream = stream
        else:
            assert False
        self._ebml_stream.seek(0, 2)
        self._file_length = self._ebml_stream.tell()
        self._ebml_stream.seek(0)
        self.mode = read_mode

    def read(self):
        # "Read() is invalid at this time", "MoveToChild(), ReadContents(), or
        # SkipContents() must be called before Read() can be called again"
        assert self.read_done or (self.mode == EbmlReadMode.SRS
                                  and self.element_type
                                  == EbmlElementType.Block), "improper state"

        element_start_position = self._ebml_stream.tell()

        # too little data (+2: 1B element ID + 1B data size)
        if element_start_position + 2 > self._file_length:
            return False

        self.current_element = None
        self.read_done = False

        # 1) Element ID -------------------------------------------------------
        # length descriptor: the leading bits of the header
        # used to identify the length of the ID (ID: like xml tags)
        read_byte = self._ebml_stream.read(1)
        if not len(read_byte):
            return False
# 			raise ValueError("Missing data")
        (id_length_descriptor, ) = BE_BYTE.unpack(read_byte)
        id_length_descriptor = GetUIntLength(id_length_descriptor)
        self.element_header = read_byte
        self.element_header += self._ebml_stream.read(id_length_descriptor - 1)

        # 2) Data size --------------------------------------------------------
        read_byte = self._ebml_stream.read(1)
        if not len(read_byte):
            return False
# 			raise ValueError("Missing data")
        (data_length_descriptor, ) = BE_BYTE.unpack(read_byte)
        data_length_descriptor = GetUIntLength(data_length_descriptor)
        self.element_header += read_byte
        self.element_header += self._ebml_stream.read(data_length_descriptor -
                                                      1)

        assert id_length_descriptor + data_length_descriptor == len(
            self.element_header)

        # 3) Data -------------------------------------------------------------
        eh = self.element_header[0:id_length_descriptor]
        self.element_type = id_type_mapping.get(eh, EbmlElementType.Unknown)

        element_length = GetEbmlUInt(self.element_header, id_length_descriptor,
                                     data_length_descriptor)

        # sanity check on element length.  skip check on Segment element so we
        # can still report expected size.  this is only applied on samples
        # since a partial movie might still be useful
        endOffset = (element_start_position + id_length_descriptor +
                     data_length_descriptor + element_length)
        if (self.mode == EbmlReadMode.Sample
                and self.element_type != EbmlElementType.Segment
                and endOffset > self._file_length):
            if self.expected_file_size:
                msg = ("Invalid element length at 0x{0:08X}. "
                       "Expected size: {1} bytes".format(
                           element_start_position, self.expected_file_size))
                raise InvalidDataException(msg)
            else:
                msg = "Invalid element length at 0x{0:08X}"
                raise InvalidDataException(msg.format(element_start_position))

        if self.element_type != EbmlElementType.Block:
            self.current_element = EbmlElement()
            self.current_element.raw_header = self.element_header
            self.current_element.element_start_pos = element_start_position
            self.current_element.length = element_length
        else:  # it's a block
            # first thing in the block is the track number
            trackDescriptor = self._ebml_stream.read(1)
            blockHeader = trackDescriptor
            trackDescriptor = GetUIntLength(BE_BYTE.unpack(trackDescriptor)[0])

            # incredibly unlikely the track number is > 1 byte,
            # but just to be safe...
            if trackDescriptor > 1:
                blockHeader += self._ebml_stream.read(trackDescriptor - 1)

            trackno = GetEbmlUInt(blockHeader, 0, trackDescriptor)

            # read in time code (2 bytes) and flags (1 byte)
            blockHeader += self._ebml_stream.read(3)
            timecode = ((BE_BYTE.unpack_from(blockHeader,
                                             len(blockHeader) - 3)[0] << 8) +
                        BE_BYTE.unpack_from(blockHeader,
                                            len(blockHeader) - 2)[0])

            # need to grab the flags (last byte of the header)
            # to check for lacing
            lace_type = (BE_BYTE.unpack_from(blockHeader,
                                             len(blockHeader) - 1)[0]
                         & EbmlLaceType.EBML)

            data_length = element_length - len(blockHeader)
            frameSizes, bytesConsumed = GetBlockFrameLengths(
                lace_type, data_length, self._ebml_stream)
            if bytesConsumed > 0:
                newBlockHeader = blockHeader
                self._ebml_stream.seek(-bytesConsumed, os.SEEK_CUR)
                newBlockHeader += self._ebml_stream.read(bytesConsumed)
                blockHeader = newBlockHeader

            element_length -= len(blockHeader)

            self.current_element = BlockElement()
            self.current_element.track_number = trackno
            self.current_element.timecode = timecode
            self.current_element.frame_lengths = frameSizes
            self.current_element.raw_block_header = blockHeader

            self.current_element.raw_header = self.element_header
            self.current_element.element_start_pos = element_start_position
            self.current_element.length = element_length

        # the following line will write mkvinfo-like output from the parser
        # (extremely useful for debugging)
# 		print("{0}: {3} + {1} bytes @ {2}".format(
# 		                            EbmlElementTypeName[self.element_type],
# 		                            element_length, # without header
# 		                            element_start_position,
# 		                            len(self.element_header)))

        return True

    def read_contents(self):
        # if readReady is set, we've already read or skipped it.
        # back up and read again?
        if self.read_done:
            self._ebml_stream.seek(-self.current_element.length, os.SEEK_CUR)

        self.read_done = True
        buff = None

        if (self.mode != EbmlReadMode.SRS
                or self.element_type != EbmlElementType.Block):
            buff = self._ebml_stream.read(self.current_element.length)
        return buff

    def skip_contents(self):
        if not self.read_done:
            self.read_done = True
            if (self.mode != EbmlReadMode.SRS
                    or self.element_type != EbmlElementType.Block):
                self._ebml_stream.seek(self.current_element.length,
                                       os.SEEK_CUR)

    def move_to_child(self):
        if self.read_done:
            self._ebml_stream.seek(-self.current_element.length, os.SEEK_CUR)
        self.read_done = True

    def close(self):
        try:  # close the file/stream
            self._ebml_stream.close()
        except:
            pass

    def __del__(self):
        try:  # close the file/stream
            self._ebml_stream.close()
        except:
            pass

Example #8

Show file

class FlacReader(object):
    """Implements a simple Reader class that reads through FLAC  
	or FLAC-SRS files one block at a time."""
    def __init__(self, path=None, stream=None, archived_file_name=""):
        assert path or stream
        if path:
            if is_rar(path):
                self._flac_stream = RarStream(path, archived_file_name)
            else:
                self._flac_stream = open(path, 'rb')
        elif stream:
            self._flac_stream = stream
        self._flac_stream.seek(0, 2)
        self._file_length = self._flac_stream.tell()
        self._flac_stream.seek(0)

        self.read_done = True
        self.current_block = None
        self.block_type = None

    def read(self):
        assert self.read_done

        block_start_position = self._flac_stream.tell()
        self.current_block = None
        self.read_done = False

        if block_start_position == self._file_length:
            return False

        self._block_header = self._flac_stream.read(4)
        # METADATA_BLOCK_HEADER
        # <1>    Last-metadata-block flag: '1' if this block is the last
        #        metadata block before the audio blocks, '0' otherwise.
        # <7>    BLOCK_TYPE
        # <24>   Length (in bytes) of metadata to follow
        #        (does not include the size of the METADATA_BLOCK_HEADER)

        if self._block_header == b"fLaC":
            self.block_type = "fLaC"
            self.current_block = Block(0, self.block_type)
            self.current_block.raw_header = b"fLaC"
            self.current_block.start_pos = block_start_position
            self._flac_stream.seek(block_start_position, os.SEEK_SET)
            return True

        # ID3v2
        if self._block_header.startswith(b"ID3"):
            self.block_type = "ID3"
            self._flac_stream.seek(block_start_position, os.SEEK_SET)
            raw_header = self._flac_stream.read(10)
            size = decode_id3_size(raw_header[6:10])
            self.current_block = Block(size, self.block_type)
            self.current_block.raw_header = raw_header
            self.current_block.start_pos = block_start_position
            self._flac_stream.seek(block_start_position, os.SEEK_SET)
            return True

        # ID3v1
        if self._block_header.startswith(b"TAG"):
            self.block_type = "TAG"
            self.current_block = Block(128, self.block_type)
            self.current_block.raw_header = b""
            self.current_block.start_pos = block_start_position
            self._flac_stream.seek(block_start_position, os.SEEK_SET)
            return True

        (self.block_type, ) = BE_BYTE.unpack_from(self._block_header, 0)
        if self.block_type == 0xFF:  # frame data
            block_length = self._file_length - block_start_position
            # check for ID3v1 tag
            self._flac_stream.seek(self._file_length - 128)
            if self._flac_stream.read(3) == b"TAG":
                block_length -= 128
            self._block_header = b""
        else:
            (block_length, ) = BE_LONG.unpack(b"\x00" + self._block_header[1:])

        # sanity check on block length
        end_offset = block_start_position + block_length
        if (end_offset > self._file_length):
            raise InvalidDataException("Invalid block length at 0x%08X" %
                                       block_start_position)

        self.current_block = Block(block_length, self.block_type)
        self.current_block.raw_header = self._block_header
        self.current_block.start_pos = block_start_position

        self._flac_stream.seek(block_start_position, os.SEEK_SET)

        return True

    def read_contents(self):
        # if read_done is set, we've already read or skipped it.
        # back up and read again?
        if self.read_done:
            self._flac_stream.seek(self.current_block.start_pos, os.SEEK_SET)

        self.read_done = True

        # skip header bytes
        hl = len(self.current_block.raw_header)
        self._flac_stream.seek(hl, os.SEEK_CUR)
        buff = self._flac_stream.read(self.current_block.size)
        return buff

    def skip_contents(self):
        if not self.read_done:
            self.read_done = True
            self._flac_stream.seek(
                self.current_block.start_pos +
                len(self.current_block.raw_header) + self.current_block.size,
                os.SEEK_SET)

    def read_part(self, size, offset=0):
        """idempotent operation"""
        hl = len(self.current_block.raw_header)
        initial_offset = self._flac_stream.tell()
        if initial_offset != self.current_block.start_pos:
            self._flac_stream.seek(self.current_block.start_pos, os.SEEK_SET)
        self._flac_stream.seek(offset + hl, os.SEEK_CUR)
        data = self._flac_stream.read(size)
        self._flac_stream.seek(initial_offset, os.SEEK_SET)
        return data

    def close(self):
        try:  # close the file/stream
            self._flac_stream.close()
        except:
            pass

    def __del__(self):
        try:  # close the file/stream
            self._flac_stream.close()
        except:
            pass

Example #9

Show file

File: riff.py Project: zc0nf/pyrescene

class RiffReader(object):
	"""Implements a simple Reader class that reads through AVI 
	or AVI-SRS files one chunk at a time."""
	def __init__(self, read_mode, path=None, stream=None, match_offset=0,
			archived_file_name=""):
		if path:
			if is_rar(path):
				self._riff_stream = RarStream(path, archived_file_name)
			else:
				self._riff_stream = open(path, 'rb')
		elif stream:
			self._riff_stream = stream
		else:
			assert False
		self._riff_stream.seek(0, os.SEEK_END)
		self._file_length = self._riff_stream.tell()
		self.mode = read_mode

		self.read_done = True
		self.current_chunk = None
		self.chunk_type = None
		self.has_padding = False
		self.padding_byte = ""

		# faster reconstructing when match_offset is provided
		if match_offset >= 8 and match_offset < self._file_length:
			# -8 is there to add the chunk header for read()
			if self._is_valid_chunk_location(match_offset - 8):
				# yes! reconstruction will be fast
				self._riff_stream.seek(match_offset - 8, os.SEEK_SET)
			else:
				# match offset is not at the start boundary of a chunk
				chunk_offset = self._find_chunk_offset(match_offset)
				if _DEBUG:
					print("Match offset doesn't start on a nice boundary.")
					print("Chunk offset: {0}".format(chunk_offset))
					print("Match offset: {0}".format(match_offset))
				assert chunk_offset < match_offset
				self._riff_stream.seek(chunk_offset, os.SEEK_SET)

			# re-initialisation
			self.read_done = True
			self.current_chunk = None
			self.chunk_type = None
			self.has_padding = False
			self.padding_byte = ""
		elif match_offset >= self._file_length:
			msg = "Invalid match offset for video: {0}".format(match_offset)
			raise InvalidMatchOffsetException(msg)
		else:
			# no useful matching offset against the main movie file
			self._riff_stream.seek(0)

	def _is_valid_chunk_location(self, offset):
		"""Checks whether a certain offset is a valid chunk location to
		start processing from. Based on Four Character Code."""
		self._riff_stream.seek(offset, os.SEEK_SET)
		fourcc = self._riff_stream.read(4)
		return fourCCValidator.match(fourcc)

	def _find_chunk_offset(self, match_offset):
		"""Finds the start offset of the chunk for the match_offset. It uses
		the index at the end of the file."""
		self._riff_stream.seek(0, os.SEEK_SET)
		index_data = ""
		movi_start = 0

		while self.read():
			fourcc = self.current_chunk.fourcc
			if fourcc == b"AVI ":
				# the index is in here
				self.move_to_child()
			elif fourcc == b"movi":
				# location where the index is relative to
				movi_start = self.current_chunk.chunk_start_pos
			elif self.chunk_type == RiffChunkType.Index:
				index_data = self.read_contents()
				break
			self.skip_contents()

		# https://msdn.microsoft.com/en-us/library/windows/desktop/dd318181(v=vs.85).aspx
		# we've found the index
		if movi_start and len(index_data):
			# read chunk positions until an _absolute_ file position larger
			# than our match offset is found
			offsets = []
			offset = 0
			idxpos = 0
			while offset < match_offset and idxpos + 16 <= len(index_data):
				(offset,) = S_LONG.unpack(index_data[idxpos + 8:idxpos + 12])
				offsets.append(offset)
				idxpos += 16  # ckid, dwFlags, dwChunkOffset, dwChunkLength

			# choose the last _relative_ chunk smaller than the match offset
			# the match offset is absolute form the beginning of the file
			for offset in reversed(offsets):
				start_offset = movi_start + 8 + offset
				if start_offset < match_offset:
					if self._is_valid_chunk_location(start_offset):
						return start_offset
					else:
						if _DEBUG:
							print("AVI doesn't follow the 'idx1' spec.")
						break

			# assume the AVI doesn't follow the specification
			for offset in reversed(offsets):
				if offset < match_offset:
					if self._is_valid_chunk_location(offset):
						return offset
					else:
						if _DEBUG:
							print("The index offset wasn't usable.")
						return 0
		return 0

	def read(self):
		# "Read() is invalid at this time", "MoveToChild(), ReadContents(), or
		# SkipContents() must be called before Read() can be called again");
		assert self.read_done or (self.mode == RiffReadMode.SRS and
		                          self.chunk_type == RiffChunkType.Movi)

		# includes 8 byte header
		chunk_start_position = self._riff_stream.tell()
		self.current_chunk = None
		self.read_done = False

		if chunk_start_position + 8 > self._file_length:
			return False

		chunk_header = self._riff_stream.read(8)
		# 4 bytes for fourcc, 4 for chunk length
		fourcc = chunk_header[:4]
		(chunk_length,) = S_LONG.unpack_from(chunk_header, 4)

		# might not keep this check
		# the length check should catch corruption on its own...
		if not fourCCValidator.match(fourcc):
			raise InvalidDataException("Invalid FourCC value (%r) at 0x%08X" %
			                           (fourcc, chunk_start_position))

		# sanity check on chunk length
		# Skip check on RIFF list so we can still report expected size.
		# This is only applied on samples,
		# since a partial movie might still be useful.
		endOffset = chunk_start_position + 8 + chunk_length
		if (self.mode == RiffReadMode.Sample and
			fourcc != b"RIFF" and endOffset > self._file_length):
			raise InvalidDataException("Invalid chunk length at 0x%08X" %
			                           (chunk_start_position + 4))

		# Lists
		if fourcc == b"RIFF" or fourcc == b"LIST":
			# if the fourcc indicates a list type (RIFF or LIST),
			# there is another fourcc code in the next 4 bytes
			listType = fourcc
			chunk_header += self._riff_stream.read(4)
			fourcc = chunk_header[8:12]
			chunk_length -= 4  # extra dwFourCC

			self.chunk_type = RiffChunkType.List
			self.current_chunk = RiffList()
			self.current_chunk.list_type = listType  # RIFF list specific
			self.current_chunk.fourcc = fourcc
			self.current_chunk.length = chunk_length
			self.current_chunk.raw_header = chunk_header
			self.current_chunk.chunk_start_pos = chunk_start_position
		else:  # Chunks
			# Chunk containing video, audio or subtitle data
			if chunk_header[:2].isdigit():
				self.current_chunk = MoviChunk()
				self.current_chunk.stream_number = int(fourcc[:2])
				self.chunk_type = RiffChunkType.Movi
			elif fourcc == b"idx1":
				self.current_chunk = RiffChunk()
				self.chunk_type = RiffChunkType.Index
			else:
				self.current_chunk = RiffChunk()
				self.chunk_type = RiffChunkType.Unknown
			self.current_chunk.fourcc = fourcc
			self.current_chunk.length = chunk_length
			self.current_chunk.raw_header = chunk_header
			self.current_chunk.chunk_start_pos = chunk_start_position
		self.has_padding = chunk_length % 2 == 1

		return True

	def read_contents(self):
		# if read_done is set, we've already read or skipped it.
		# back up and read again?
		if self.read_done:
			self._riff_stream.seek(-self.current_chunk.length -
			                       (1 if self.has_padding else 0), os.SEEK_CUR)

		self.read_done = True
		buff = None

		if (self.mode != RiffReadMode.SRS or
			self.chunk_type != RiffChunkType.Movi):
			buff = self._riff_stream.read(self.current_chunk.length)

		if self.has_padding:
			(self.padding_byte,) = S_BYTE.unpack(self._riff_stream.read(1))

		return buff

	def skip_contents(self):
		if not self.read_done:
			self.read_done = True
			if (self.mode != RiffReadMode.SRS
				or self.chunk_type != RiffChunkType.Movi):
				self._riff_stream.seek(self.current_chunk.length, os.SEEK_CUR)

			if self.has_padding:
				(self.padding_byte,) = S_BYTE.unpack(self._riff_stream.read(1))

	def move_to_child(self):
		# "MoveToChild() should only be called on a RIFF List");
		assert self.chunk_type == RiffChunkType.List
		self.read_done = True

	def close(self):
		try:  # close the file/stream
			self._riff_stream.close()
		except:
			pass

	def __del__(self):
		try:  # close the file/stream
			self._riff_stream.close()
		except:
			pass