Esempio n. 1
0
    def __init__(self,
                 read_mode,
                 path=None,
                 stream=None,
                 archived_file_name=""):
        assert path or stream
        self.element_header = b""  # 12 bytes

        self._ebml_stream = None
        self.mode = None
        self.read_done = True

        self.current_element = None
        self.element_type = None

        # when not empty: an expected file size has been printed
        # to stderr already when data was missing
        self.expected_file_size = ""

        if path:
            if is_rar(path):
                self._ebml_stream = RarStream(path, archived_file_name)
            else:
                self._ebml_stream = open(path, 'rb')
        elif stream:
            self._ebml_stream = stream
        else:
            assert False
        self._ebml_stream.seek(0, 2)
        self._file_length = self._ebml_stream.tell()
        self._ebml_stream.seek(0)
        self.mode = read_mode
Esempio n. 2
0
    def test_file(self):
        """ Tests if the file in the rar archive is the same as the
			extracted version. """
        rar_file = os.path.join(self.path, "store_little", "store_little.rar")
        txt_file = os.path.join(self.path, "txt", "little_file.txt")
        rs = RarStream(rar_file)
        with open(txt_file, "rb") as tfile:
            self.assertEqual(rs.read(), tfile.read())

        rar_file = os.path.join(self.path,
                                "store_split_folder_old_srrsfv_windows",
                                "winrar2.80.rar")
        txt_file = os.path.join(self.path, "txt", "unicode_dos.nfo")
        rs = RarStream(rar_file, "unicode_dos.nfo")  # 3.316 bytes
        with open(txt_file, "rb") as tfile:
            rs.seek(3316)
            self.assertEqual(rs.seek(6316), rs.tell())
            rs.seek(3312)
            tfile.seek(3336, os.SEEK_SET)
            tfile.seek(6336, os.SEEK_SET)
            rs.read(4)
            rs.seek(0)
            tfile.seek(0)
            self.assertEqual(rs.read(), tfile.read())
            tfile.seek(-20, os.SEEK_END)
            self.assertEqual(rs.seek(-20, os.SEEK_END), tfile.tell())
            self.assertEqual(rs.read(), tfile.read())
        rs.close()
        self.assertEqual(rs.closed, True, "Stream not closed")

        txt_file = os.path.join(self.path, "txt", "unicode_mac.nfo")
        rs = RarStream(rar_file, "unicode_mac.nfo")
        with open(txt_file, "rb") as tfile:
            tfile.seek(3000)
            tfile.read()
            tfile.seek(333)
            rs.seek(333)
            self.assertEqual(rs.read(), tfile.read())
Esempio n. 3
0
	def __init__(self, read_mode, path=None, stream=None, match_offset=0,
			archived_file_name=""):
		if path:
			if is_rar(path):
				self._riff_stream = RarStream(path, archived_file_name)
			else:
				self._riff_stream = open(path, 'rb')
		elif stream:
			self._riff_stream = stream
		else:
			assert False
		self._riff_stream.seek(0, os.SEEK_END)
		self._file_length = self._riff_stream.tell()
		self.mode = read_mode

		self.read_done = True
		self.current_chunk = None
		self.chunk_type = None
		self.has_padding = False
		self.padding_byte = ""

		# faster reconstructing when match_offset is provided
		if match_offset >= 8 and match_offset < self._file_length:
			# -8 is there to add the chunk header for read()
			if self._is_valid_chunk_location(match_offset - 8):
				# yes! reconstruction will be fast
				self._riff_stream.seek(match_offset - 8, os.SEEK_SET)
			else:
				# match offset is not at the start boundary of a chunk
				chunk_offset = self._find_chunk_offset(match_offset)
				if _DEBUG:
					print("Match offset doesn't start on a nice boundary.")
					print("Chunk offset: {0}".format(chunk_offset))
					print("Match offset: {0}".format(match_offset))
				assert chunk_offset < match_offset
				self._riff_stream.seek(chunk_offset, os.SEEK_SET)

			# re-initialisation
			self.read_done = True
			self.current_chunk = None
			self.chunk_type = None
			self.has_padding = False
			self.padding_byte = ""
		elif match_offset >= self._file_length:
			msg = "Invalid match offset for video: {0}".format(match_offset)
			raise InvalidMatchOffsetException(msg)
		else:
			# no useful matching offset against the main movie file
			self._riff_stream.seek(0)
Esempio n. 4
0
    def __init__(self, path=None, stream=None, archived_file_name=""):
        assert path or stream
        if path:
            if is_rar(path):
                self._flac_stream = RarStream(path, archived_file_name)
            else:
                self._flac_stream = open(path, 'rb')
        elif stream:
            self._flac_stream = stream
        self._flac_stream.seek(0, 2)
        self._file_length = self._flac_stream.tell()
        self._flac_stream.seek(0)

        self.read_done = True
        self.current_block = None
        self.block_type = None
Esempio n. 5
0
    def __init__(self, path=None, stream=None, archived_file_name=""):
        assert path or stream
        if path:
            if is_rar(path):
                self._stream = RarStream(path, archived_file_name)
            else:
                self._stream = open(path, 'rb')
        elif stream:
            self._stream = stream
        self._stream.seek(0, 2)
        self._file_length = self._stream.tell()
        self._stream.seek(0)

        self.current_block = None
        self.blocks = []

        pos = 0
        while pos < self._file_length:
            if pos + 8 > self._file_length:
                raise InvalidDataException("SRS file too small!")

            # header: block signature
            marker = self._stream.read(4)
            if pos == 0 and marker not in (STREAM_MARKER, M2TS_MARKER):
                raise InvalidDataException("Not a stream or m2ts SRS file!")
            if marker not in (b"STRM", b"SRSF", b"SRST", b"M2TS", b"HDRS"):
                print("Unknown header block encountered")
            else:
                marker = marker.decode("ascii")

            # header: block size
            (size, ) = S_LONG.unpack(self._stream.read(4))
            block = Block(size, marker, pos)
            self.blocks.append(block)
            if _DEBUG:
                print(block)

            if size == 0 and pos != 0:
                # only allowed for the marker block
                raise InvalidDataException("SRS size field is zero")

            pos += size
            if pos > self._file_length:
                raise InvalidDataException("SRS file too small!")

            self._stream.seek(pos)
        self._stream.seek(0)
Esempio n. 6
0
    def __init__(self,
                 read_mode,
                 path=None,
                 stream=None,
                 archived_file_name=""):
        assert path or stream, "missing ASF reader input"
        if path:
            if is_rar(path):
                self._asf_stream = RarStream(path, archived_file_name)
            else:
                self._asf_stream = open(path, 'rb')
        elif stream:
            self._asf_stream = stream
        self._asf_stream.seek(0, 2)
        self._file_length = self._asf_stream.tell()
        self._asf_stream.seek(0)
        self.mode = read_mode

        self.read_done = True
        self.current_object = None
        self.object_guid = None
Esempio n. 7
0
    def __init__(self,
                 read_mode=M2tsReadMode.M2ts,
                 path=None,
                 stream=None,
                 match_offset=0,
                 archived_file_name=""):
        assert path or stream
        if path:
            if is_rar(path):
                self._stream = RarStream(path, archived_file_name)
            else:
                self._stream = open(path, 'rb')
        elif stream:
            self._stream = stream
        self._stream.seek(0, 2)
        self._file_length = self._stream.tell()
        self.mode = read_mode
        self.read_done = True

        self.current_packet = None
        self.current_offset = 0

        if self._file_length < 192:
            raise InvalidDataException("File too small")

        # faster reconstructing when match_offset is provided
        if match_offset >= 8 and match_offset < self._file_length:
            # use lowest muliple of 192 < offset as a starting point
            start = match_offset // PACKET_SIZE
            self._stream.seek(start)
            self.current_offset = start
        elif match_offset >= self._file_length:
            msg = "Invalid match offset for video: {0}".format(match_offset)
            raise InvalidMatchOffsetException(msg)
        else:
            # no useful matching offset against the main movie file
            self._stream.seek(0)
Esempio n. 8
0
 def test_folder_multiple(self):
     # with path and multiple files in folder / split volumes
     rs = RarStream(
         os.path.join(self.path, self.folder, "store_split_folder.rar"),
         "txt/users_manual4.00.txt")
     with open(os.path.join(self.path, "txt", "users_manual4.00.txt"),
               "rb") as txt_file:
         # + other tests to increase code coverage
         self.assertEqual(rs.read(), txt_file.read())
         self.assertEqual(rs.tell(), txt_file.tell())
         self.assertEqual(rs.length(), txt_file.tell())
         self.assertEqual(rs.readable(), True)
         self.assertEqual(rs.seekable(), True)
         self.assertEqual(rs.read(), b"")
         self.assertEqual(rs.read(), b"")
         rs.seek(0, os.SEEK_SET)
         rs.read(2)
         rs.seek(0, os.SEEK_END)
         self.assertRaises(IndexError, rs.seek, -1)
     self.assertEqual(rs.list_files(), [
         "txt\\empty_file.txt", "txt\\little_file.txt",
         "txt\\users_manual4.00.txt"
     ])
     self.assertRaises(NotImplementedError, rs.readinto, "")
Esempio n. 9
0
	def __init__(self, path=None, stream=None, archived_file_name=""):
		assert path or stream
		if path:
			if is_rar(path):
				self._mp3_stream = RarStream(path, archived_file_name)
			else:
				self._mp3_stream = open(path, 'rb')
		elif stream:
			self._mp3_stream = stream
		self._mp3_stream.seek(0, 2)  # reset on ID3v2 tag search
		self._file_length = self._mp3_stream.tell()

		self.current_block = None

		self.blocks = []
		begin_main_content = 0

		# easier for corner case ("ID3" multiple times before sync)
		last_id3v2 = None

		# parse the whole file immediately!
		# 1) check for ID3v2 (beginning of mp3 file)
		# The ID3v2 tag size is the size of the complete tag after
		# unsychronisation, including padding, excluding the header but not
		# excluding the extended header (total tag size - 10). Only 28 bits
		# (representing up to 256MB) are used in the size description to avoid
		# the introduction of 'false syncsignals'.
		# http://id3.org/id3v2.4.0-structure
		while True:  # tag should be here only once
			# detect repeating leading ID3 tags in the srs files
			startpos = begin_main_content
			self._mp3_stream.seek(startpos, os.SEEK_SET)
			if self._mp3_stream.read(3) == b"ID3":
				# skip ID3v2 version (2 bytes) and flags (1 byte)
				self._mp3_stream.seek(3, os.SEEK_CUR)
				sbytes = self._mp3_stream.read(4)
				size = decode_id3_size(sbytes)

				tag_size = 10 + size  # 3 + 3 + 4
				last_id3v2 = Block(tag_size, "ID3", startpos)
				self.blocks.append(last_id3v2)
				begin_main_content += tag_size
			else:
				break

		# 2) check for ID3v1 (last 128 bytes of mp3 file)
		end_meta_data_offset = self._file_length
		self._mp3_stream.seek(-128, os.SEEK_END)
		idv1_start_offset = self._mp3_stream.tell()
		first = self._mp3_stream.read(3)
		if first == b"TAG":
			idv1_block = Block(128, "TAG", idv1_start_offset)
			self.blocks.append(idv1_block)
			end_meta_data_offset = idv1_start_offset

		# 3) check for http://id3.org/Lyrics3v2
		# "The Lyrics3 block, after the MP3 audio and before the ID3 tag,
		# begins with the word "LYRICSBEGIN" after which a number of field
		# records follows. The Lyrics3 block ends with a six character size
		# descriptor and the string "LYRICS200". The size value includes the
		# "LYRICSBEGIN" string, but does not include the 6 character size
		# descriptor and the trailing "LYRICS200" string.
		if end_meta_data_offset - 6 - 9 >= 0:
			self._mp3_stream.seek(end_meta_data_offset - 6 - 9, os.SEEK_SET)
			lyrics_footer = self._mp3_stream.read(6 + 9)
			if lyrics_footer[6:] == b"LYRICS200":
				lyrics_size = int(lyrics_footer[:6])  # only header + body
				lyrics3v2_block = Block(lyrics_size + 6 + 9, "LYRICS200",
				                        end_meta_data_offset -
				                        (lyrics_size + 6 + 9))
				self.blocks.append(lyrics3v2_block)
				end_meta_data_offset -= (lyrics_size + 6 + 9)

		# 4) check for http://id3.org/Lyrics3
		if end_meta_data_offset - 9 >= 0:
			self._mp3_stream.seek(end_meta_data_offset - 9, os.SEEK_SET)
			if b"LYRICSEND" == self._mp3_stream.read(9):
				self._mp3_stream.seek(end_meta_data_offset - 5100, os.SEEK_SET)
				lyrics_data = self._mp3_stream.read(5100)
				index = lyrics_data.find(b"LYRICSBEGIN")
				if index == -1:
					raise InvalidDataException(
							"Unable to find start of LyricsV1 block")
				start_block = end_meta_data_offset - 5100 + index
				lyrics3_block = Block(end_meta_data_offset - start_block,
				                      "LYRICS", start_block)
				self.blocks.append(lyrics3_block)
				end_meta_data_offset -= lyrics3_block.size

		# 5) APE tags
		# "Tag size in bytes including footer and all tag items excluding
		# the header to be as compatible as possible with APE Tags 1.000"
		# "An APEv1 tag at the end of a file must have at least a footer, APEv1
		# tags may never be used at the beginning of a file
		# (unlike APEv2 tags)."
		if end_meta_data_offset - 32 >= 0:
			self._mp3_stream.seek(end_meta_data_offset - 32, os.SEEK_SET)
			if b"APETAGEX" == self._mp3_stream.read(8):
				(version,) = S_LONG.unpack(self._mp3_stream.read(4))
				if version == 2000:
					header = 32
				else:  # 1000
					header = 0
				(size,) = S_LONG.unpack(self._mp3_stream.read(4))
				start_block = end_meta_data_offset - size - header
				apev2_block = Block(end_meta_data_offset - start_block,
				                    "APE%s" % version, start_block)
				self.blocks.append(apev2_block)
				end_meta_data_offset -= apev2_block.size

		def marker_has_issues(marker):
			if len(marker) != 4:
				return True
			(sync,) = BE_SHORT.unpack(marker[:2])
			sync_bytes = sync & 0xFFE0 == 0xFFE0
			if not sync_bytes and marker not in (b"RIFF", b"SRSF"):
				return True
			return False

		# in between is SRS or MP3 data
		self._mp3_stream.seek(begin_main_content, os.SEEK_SET)
		marker = self._mp3_stream.read(4)

		if last_id3v2 and marker_has_issues(marker):
			# problem with (angelmoon)-hes_all_i_want_cd_pg2k-bmi
			# The .mp3 files contain ID3+nfo before the real ID3 starts
			# And it's also a RIFF mp3, so it won't play without removing
			# the bad initial tag first.
			# This can cause the space between the "ID3" and the end tag
			# to be empty. (or just wrong)
			# Mickey_K.-Distracted-(DNR019F8)-WEB-2008-B2R has the 'ID3' string
			# in the ID3v2 tag for 02-mickey_k.-distracted_-_dub_mix.mp3
			last_id3 = last_id3v2_before_sync(self._mp3_stream,
			                                  self._file_length)
			dupe_id3_string = last_id3 != last_id3v2.start_pos
			after_v2_tag = last_id3 >= last_id3v2.start_pos + last_id3v2.size
			if dupe_id3_string and after_v2_tag:
				# another 'ID3' string found after id3v2 tag
				self._mp3_stream.seek(last_id3 + 3 + 3, os.SEEK_SET)
				sbytes = self._mp3_stream.read(4)
				size = decode_id3_size(sbytes)

				begin_main_content = last_id3 + 10 + size  # 3 + 3 + 4
				# add extra amount of data to the last block
				last_id3v2.size = begin_main_content - last_id3v2.start_pos
			elif dupe_id3_string and not after_v2_tag:
				# another 'ID3' string found inside first id3v2 tag
				if begin_main_content > self._file_length:
					# first tag is corrupt by definition
					# assume latter tag to be the good one: parse it
					# skip 'ID3' + ID3v2 version (2 bytes) and flags (1 byte)
					self._mp3_stream.seek(last_id3 + 6, os.SEEK_SET)
					sbytes = self._mp3_stream.read(4)
					size = decode_id3_size(sbytes)
					tag_size = 10 + size  # 3 + 3 + 4
					last_id3v2 = Block(tag_size, "ID3", last_id3)
					self.blocks.append(last_id3v2)
					begin_main_content = last_id3 + tag_size

		self._mp3_stream.seek(begin_main_content, os.SEEK_SET)
		marker = self._mp3_stream.read(4)

		if not len(marker):
			# there still is something horribly wrong
			# (unless you think that an mp3 without any music data is possible)
			raise InvalidDataException("Tagging f****d up big time!")

		(sync,) = BE_SHORT.unpack(marker[:2])
		main_size = end_meta_data_offset - begin_main_content
		if marker[:3] == b"SRS":  # SRS data blocks
			cur_pos = begin_main_content
			while cur_pos < begin_main_content + main_size:
				self._mp3_stream.seek(cur_pos, os.SEEK_SET)
				# SRSF, SRST and SRSP
				try:
					marker = self._mp3_stream.read(4)
					# size includes the 8 bytes header
					(size,) = S_LONG.unpack(self._mp3_stream.read(4))
				except:
					raise InvalidDataException("Not enough SRS data")
				srs_block = Block(size, marker.decode("ascii"), cur_pos)
				self.blocks.append(srs_block)
				cur_pos += size
				if size == 0:
					raise InvalidDataException("SRS size field is zero")
				if size > begin_main_content + main_size:
					raise InvalidDataException("Broken SRS")
		elif sync & 0xFFE0 == 0xFFE0 or marker == b"RIFF":
			# first 11 bits all 1 for MP3 frame marker
			mp3_data_block = Block(main_size, "MP3", begin_main_content)
			self.blocks.append(mp3_data_block)
		else:
			print("WARNING: MP3 file is not valid!")
			data_block = Block(main_size, "MP3", begin_main_content)
			self.blocks.append(data_block)

		# the order of which we add blocks doesn't matter this way
		self.blocks.sort(key=lambda block: block.start_pos)
Esempio n. 10
0
 def test_error_compressed_rar(self):
     compr = os.path.join(os.pardir, os.pardir, "test_files", "best_little",
                          "best_little.rar")
     # AttributeError: Compressed RARs are not supported
     self.assertRaises(AttributeError, RarStream, compr)
     RarStream(compr, compressed=True)
Esempio n. 11
0
 def test_read_nothing(self):
     rar_file = os.path.join(self.path, "store_little", "store_little.rar")
     rs = RarStream(rar_file)
     self.assertEqual(b"", rs.read(0))
Esempio n. 12
0
def osohash_from(rar_archive, enclosed_file=None, middle=False):
    """If enclosed_file is not supplied, the srr_hash will be calculated based
	on the first file in the archive(s). To get a list of the files inside the
	archive, use RarReader.list_files().
	middle: not the first RAR archive from the set is expected in the stream"""
    return _osorg_hash(RarStream(rar_archive, enclosed_file, middle))