def __init__(self, read_mode, path=None, stream=None, archived_file_name=""): assert path or stream self.element_header = b"" # 12 bytes self._ebml_stream = None self.mode = None self.read_done = True self.current_element = None self.element_type = None # when not empty: an expected file size has been printed # to stderr already when data was missing self.expected_file_size = "" if path: if is_rar(path): self._ebml_stream = RarStream(path, archived_file_name) else: self._ebml_stream = open(path, 'rb') elif stream: self._ebml_stream = stream else: assert False self._ebml_stream.seek(0, 2) self._file_length = self._ebml_stream.tell() self._ebml_stream.seek(0) self.mode = read_mode
def test_file(self): """ Tests if the file in the rar archive is the same as the extracted version. """ rar_file = os.path.join(self.path, "store_little", "store_little.rar") txt_file = os.path.join(self.path, "txt", "little_file.txt") rs = RarStream(rar_file) with open(txt_file, "rb") as tfile: self.assertEqual(rs.read(), tfile.read()) rar_file = os.path.join(self.path, "store_split_folder_old_srrsfv_windows", "winrar2.80.rar") txt_file = os.path.join(self.path, "txt", "unicode_dos.nfo") rs = RarStream(rar_file, "unicode_dos.nfo") # 3.316 bytes with open(txt_file, "rb") as tfile: rs.seek(3316) self.assertEqual(rs.seek(6316), rs.tell()) rs.seek(3312) tfile.seek(3336, os.SEEK_SET) tfile.seek(6336, os.SEEK_SET) rs.read(4) rs.seek(0) tfile.seek(0) self.assertEqual(rs.read(), tfile.read()) tfile.seek(-20, os.SEEK_END) self.assertEqual(rs.seek(-20, os.SEEK_END), tfile.tell()) self.assertEqual(rs.read(), tfile.read()) rs.close() self.assertEqual(rs.closed, True, "Stream not closed") txt_file = os.path.join(self.path, "txt", "unicode_mac.nfo") rs = RarStream(rar_file, "unicode_mac.nfo") with open(txt_file, "rb") as tfile: tfile.seek(3000) tfile.read() tfile.seek(333) rs.seek(333) self.assertEqual(rs.read(), tfile.read())
def __init__(self, read_mode, path=None, stream=None, match_offset=0, archived_file_name=""): if path: if is_rar(path): self._riff_stream = RarStream(path, archived_file_name) else: self._riff_stream = open(path, 'rb') elif stream: self._riff_stream = stream else: assert False self._riff_stream.seek(0, os.SEEK_END) self._file_length = self._riff_stream.tell() self.mode = read_mode self.read_done = True self.current_chunk = None self.chunk_type = None self.has_padding = False self.padding_byte = "" # faster reconstructing when match_offset is provided if match_offset >= 8 and match_offset < self._file_length: # -8 is there to add the chunk header for read() if self._is_valid_chunk_location(match_offset - 8): # yes! reconstruction will be fast self._riff_stream.seek(match_offset - 8, os.SEEK_SET) else: # match offset is not at the start boundary of a chunk chunk_offset = self._find_chunk_offset(match_offset) if _DEBUG: print("Match offset doesn't start on a nice boundary.") print("Chunk offset: {0}".format(chunk_offset)) print("Match offset: {0}".format(match_offset)) assert chunk_offset < match_offset self._riff_stream.seek(chunk_offset, os.SEEK_SET) # re-initialisation self.read_done = True self.current_chunk = None self.chunk_type = None self.has_padding = False self.padding_byte = "" elif match_offset >= self._file_length: msg = "Invalid match offset for video: {0}".format(match_offset) raise InvalidMatchOffsetException(msg) else: # no useful matching offset against the main movie file self._riff_stream.seek(0)
def __init__(self, path=None, stream=None, archived_file_name=""): assert path or stream if path: if is_rar(path): self._flac_stream = RarStream(path, archived_file_name) else: self._flac_stream = open(path, 'rb') elif stream: self._flac_stream = stream self._flac_stream.seek(0, 2) self._file_length = self._flac_stream.tell() self._flac_stream.seek(0) self.read_done = True self.current_block = None self.block_type = None
def __init__(self, path=None, stream=None, archived_file_name=""): assert path or stream if path: if is_rar(path): self._stream = RarStream(path, archived_file_name) else: self._stream = open(path, 'rb') elif stream: self._stream = stream self._stream.seek(0, 2) self._file_length = self._stream.tell() self._stream.seek(0) self.current_block = None self.blocks = [] pos = 0 while pos < self._file_length: if pos + 8 > self._file_length: raise InvalidDataException("SRS file too small!") # header: block signature marker = self._stream.read(4) if pos == 0 and marker not in (STREAM_MARKER, M2TS_MARKER): raise InvalidDataException("Not a stream or m2ts SRS file!") if marker not in (b"STRM", b"SRSF", b"SRST", b"M2TS", b"HDRS"): print("Unknown header block encountered") else: marker = marker.decode("ascii") # header: block size (size, ) = S_LONG.unpack(self._stream.read(4)) block = Block(size, marker, pos) self.blocks.append(block) if _DEBUG: print(block) if size == 0 and pos != 0: # only allowed for the marker block raise InvalidDataException("SRS size field is zero") pos += size if pos > self._file_length: raise InvalidDataException("SRS file too small!") self._stream.seek(pos) self._stream.seek(0)
def __init__(self, read_mode, path=None, stream=None, archived_file_name=""): assert path or stream, "missing ASF reader input" if path: if is_rar(path): self._asf_stream = RarStream(path, archived_file_name) else: self._asf_stream = open(path, 'rb') elif stream: self._asf_stream = stream self._asf_stream.seek(0, 2) self._file_length = self._asf_stream.tell() self._asf_stream.seek(0) self.mode = read_mode self.read_done = True self.current_object = None self.object_guid = None
def __init__(self, read_mode=M2tsReadMode.M2ts, path=None, stream=None, match_offset=0, archived_file_name=""): assert path or stream if path: if is_rar(path): self._stream = RarStream(path, archived_file_name) else: self._stream = open(path, 'rb') elif stream: self._stream = stream self._stream.seek(0, 2) self._file_length = self._stream.tell() self.mode = read_mode self.read_done = True self.current_packet = None self.current_offset = 0 if self._file_length < 192: raise InvalidDataException("File too small") # faster reconstructing when match_offset is provided if match_offset >= 8 and match_offset < self._file_length: # use lowest muliple of 192 < offset as a starting point start = match_offset // PACKET_SIZE self._stream.seek(start) self.current_offset = start elif match_offset >= self._file_length: msg = "Invalid match offset for video: {0}".format(match_offset) raise InvalidMatchOffsetException(msg) else: # no useful matching offset against the main movie file self._stream.seek(0)
def test_folder_multiple(self): # with path and multiple files in folder / split volumes rs = RarStream( os.path.join(self.path, self.folder, "store_split_folder.rar"), "txt/users_manual4.00.txt") with open(os.path.join(self.path, "txt", "users_manual4.00.txt"), "rb") as txt_file: # + other tests to increase code coverage self.assertEqual(rs.read(), txt_file.read()) self.assertEqual(rs.tell(), txt_file.tell()) self.assertEqual(rs.length(), txt_file.tell()) self.assertEqual(rs.readable(), True) self.assertEqual(rs.seekable(), True) self.assertEqual(rs.read(), b"") self.assertEqual(rs.read(), b"") rs.seek(0, os.SEEK_SET) rs.read(2) rs.seek(0, os.SEEK_END) self.assertRaises(IndexError, rs.seek, -1) self.assertEqual(rs.list_files(), [ "txt\\empty_file.txt", "txt\\little_file.txt", "txt\\users_manual4.00.txt" ]) self.assertRaises(NotImplementedError, rs.readinto, "")
def __init__(self, path=None, stream=None, archived_file_name=""): assert path or stream if path: if is_rar(path): self._mp3_stream = RarStream(path, archived_file_name) else: self._mp3_stream = open(path, 'rb') elif stream: self._mp3_stream = stream self._mp3_stream.seek(0, 2) # reset on ID3v2 tag search self._file_length = self._mp3_stream.tell() self.current_block = None self.blocks = [] begin_main_content = 0 # easier for corner case ("ID3" multiple times before sync) last_id3v2 = None # parse the whole file immediately! # 1) check for ID3v2 (beginning of mp3 file) # The ID3v2 tag size is the size of the complete tag after # unsychronisation, including padding, excluding the header but not # excluding the extended header (total tag size - 10). Only 28 bits # (representing up to 256MB) are used in the size description to avoid # the introduction of 'false syncsignals'. # http://id3.org/id3v2.4.0-structure while True: # tag should be here only once # detect repeating leading ID3 tags in the srs files startpos = begin_main_content self._mp3_stream.seek(startpos, os.SEEK_SET) if self._mp3_stream.read(3) == b"ID3": # skip ID3v2 version (2 bytes) and flags (1 byte) self._mp3_stream.seek(3, os.SEEK_CUR) sbytes = self._mp3_stream.read(4) size = decode_id3_size(sbytes) tag_size = 10 + size # 3 + 3 + 4 last_id3v2 = Block(tag_size, "ID3", startpos) self.blocks.append(last_id3v2) begin_main_content += tag_size else: break # 2) check for ID3v1 (last 128 bytes of mp3 file) end_meta_data_offset = self._file_length self._mp3_stream.seek(-128, os.SEEK_END) idv1_start_offset = self._mp3_stream.tell() first = self._mp3_stream.read(3) if first == b"TAG": idv1_block = Block(128, "TAG", idv1_start_offset) self.blocks.append(idv1_block) end_meta_data_offset = idv1_start_offset # 3) check for http://id3.org/Lyrics3v2 # "The Lyrics3 block, after the MP3 audio and before the ID3 tag, # begins with the word "LYRICSBEGIN" after which a number of field # records follows. The Lyrics3 block ends with a six character size # descriptor and the string "LYRICS200". The size value includes the # "LYRICSBEGIN" string, but does not include the 6 character size # descriptor and the trailing "LYRICS200" string. if end_meta_data_offset - 6 - 9 >= 0: self._mp3_stream.seek(end_meta_data_offset - 6 - 9, os.SEEK_SET) lyrics_footer = self._mp3_stream.read(6 + 9) if lyrics_footer[6:] == b"LYRICS200": lyrics_size = int(lyrics_footer[:6]) # only header + body lyrics3v2_block = Block(lyrics_size + 6 + 9, "LYRICS200", end_meta_data_offset - (lyrics_size + 6 + 9)) self.blocks.append(lyrics3v2_block) end_meta_data_offset -= (lyrics_size + 6 + 9) # 4) check for http://id3.org/Lyrics3 if end_meta_data_offset - 9 >= 0: self._mp3_stream.seek(end_meta_data_offset - 9, os.SEEK_SET) if b"LYRICSEND" == self._mp3_stream.read(9): self._mp3_stream.seek(end_meta_data_offset - 5100, os.SEEK_SET) lyrics_data = self._mp3_stream.read(5100) index = lyrics_data.find(b"LYRICSBEGIN") if index == -1: raise InvalidDataException( "Unable to find start of LyricsV1 block") start_block = end_meta_data_offset - 5100 + index lyrics3_block = Block(end_meta_data_offset - start_block, "LYRICS", start_block) self.blocks.append(lyrics3_block) end_meta_data_offset -= lyrics3_block.size # 5) APE tags # "Tag size in bytes including footer and all tag items excluding # the header to be as compatible as possible with APE Tags 1.000" # "An APEv1 tag at the end of a file must have at least a footer, APEv1 # tags may never be used at the beginning of a file # (unlike APEv2 tags)." if end_meta_data_offset - 32 >= 0: self._mp3_stream.seek(end_meta_data_offset - 32, os.SEEK_SET) if b"APETAGEX" == self._mp3_stream.read(8): (version,) = S_LONG.unpack(self._mp3_stream.read(4)) if version == 2000: header = 32 else: # 1000 header = 0 (size,) = S_LONG.unpack(self._mp3_stream.read(4)) start_block = end_meta_data_offset - size - header apev2_block = Block(end_meta_data_offset - start_block, "APE%s" % version, start_block) self.blocks.append(apev2_block) end_meta_data_offset -= apev2_block.size def marker_has_issues(marker): if len(marker) != 4: return True (sync,) = BE_SHORT.unpack(marker[:2]) sync_bytes = sync & 0xFFE0 == 0xFFE0 if not sync_bytes and marker not in (b"RIFF", b"SRSF"): return True return False # in between is SRS or MP3 data self._mp3_stream.seek(begin_main_content, os.SEEK_SET) marker = self._mp3_stream.read(4) if last_id3v2 and marker_has_issues(marker): # problem with (angelmoon)-hes_all_i_want_cd_pg2k-bmi # The .mp3 files contain ID3+nfo before the real ID3 starts # And it's also a RIFF mp3, so it won't play without removing # the bad initial tag first. # This can cause the space between the "ID3" and the end tag # to be empty. (or just wrong) # Mickey_K.-Distracted-(DNR019F8)-WEB-2008-B2R has the 'ID3' string # in the ID3v2 tag for 02-mickey_k.-distracted_-_dub_mix.mp3 last_id3 = last_id3v2_before_sync(self._mp3_stream, self._file_length) dupe_id3_string = last_id3 != last_id3v2.start_pos after_v2_tag = last_id3 >= last_id3v2.start_pos + last_id3v2.size if dupe_id3_string and after_v2_tag: # another 'ID3' string found after id3v2 tag self._mp3_stream.seek(last_id3 + 3 + 3, os.SEEK_SET) sbytes = self._mp3_stream.read(4) size = decode_id3_size(sbytes) begin_main_content = last_id3 + 10 + size # 3 + 3 + 4 # add extra amount of data to the last block last_id3v2.size = begin_main_content - last_id3v2.start_pos elif dupe_id3_string and not after_v2_tag: # another 'ID3' string found inside first id3v2 tag if begin_main_content > self._file_length: # first tag is corrupt by definition # assume latter tag to be the good one: parse it # skip 'ID3' + ID3v2 version (2 bytes) and flags (1 byte) self._mp3_stream.seek(last_id3 + 6, os.SEEK_SET) sbytes = self._mp3_stream.read(4) size = decode_id3_size(sbytes) tag_size = 10 + size # 3 + 3 + 4 last_id3v2 = Block(tag_size, "ID3", last_id3) self.blocks.append(last_id3v2) begin_main_content = last_id3 + tag_size self._mp3_stream.seek(begin_main_content, os.SEEK_SET) marker = self._mp3_stream.read(4) if not len(marker): # there still is something horribly wrong # (unless you think that an mp3 without any music data is possible) raise InvalidDataException("Tagging f****d up big time!") (sync,) = BE_SHORT.unpack(marker[:2]) main_size = end_meta_data_offset - begin_main_content if marker[:3] == b"SRS": # SRS data blocks cur_pos = begin_main_content while cur_pos < begin_main_content + main_size: self._mp3_stream.seek(cur_pos, os.SEEK_SET) # SRSF, SRST and SRSP try: marker = self._mp3_stream.read(4) # size includes the 8 bytes header (size,) = S_LONG.unpack(self._mp3_stream.read(4)) except: raise InvalidDataException("Not enough SRS data") srs_block = Block(size, marker.decode("ascii"), cur_pos) self.blocks.append(srs_block) cur_pos += size if size == 0: raise InvalidDataException("SRS size field is zero") if size > begin_main_content + main_size: raise InvalidDataException("Broken SRS") elif sync & 0xFFE0 == 0xFFE0 or marker == b"RIFF": # first 11 bits all 1 for MP3 frame marker mp3_data_block = Block(main_size, "MP3", begin_main_content) self.blocks.append(mp3_data_block) else: print("WARNING: MP3 file is not valid!") data_block = Block(main_size, "MP3", begin_main_content) self.blocks.append(data_block) # the order of which we add blocks doesn't matter this way self.blocks.sort(key=lambda block: block.start_pos)
def test_error_compressed_rar(self): compr = os.path.join(os.pardir, os.pardir, "test_files", "best_little", "best_little.rar") # AttributeError: Compressed RARs are not supported self.assertRaises(AttributeError, RarStream, compr) RarStream(compr, compressed=True)
def test_read_nothing(self): rar_file = os.path.join(self.path, "store_little", "store_little.rar") rs = RarStream(rar_file) self.assertEqual(b"", rs.read(0))
def osohash_from(rar_archive, enclosed_file=None, middle=False): """If enclosed_file is not supplied, the srr_hash will be calculated based on the first file in the archive(s). To get a list of the files inside the archive, use RarReader.list_files(). middle: not the first RAR archive from the set is expected in the stream""" return _osorg_hash(RarStream(rar_archive, enclosed_file, middle))