def iter_test_file_objects(fileobj): """Given a file object yields the same file object which fails differently each time """ t = TestFileObj(fileobj) # first figure out how much a successful attempt reads and how many # file object operations it executes. yield t for i in xrange(t.dataread): yield TestFileObj(fileobj, stop_after=i) for i in xrange(t.operations): yield TestFileObj(fileobj, fail_after=i)
def test_random_data_roundtrip(self): try: random_file = open("/dev/urandom", "rb") except (IOError, OSError): print("WARNING: Random data round trip test disabled.") return for i in xrange(10): num_packets = random.randrange(2, 100) lengths = [random.randrange(10, 10000) for i in xrange(num_packets)] packets = list(map(random_file.read, lengths)) self.failUnlessEqual( packets, OggPage.to_packets(OggPage.from_packets(packets)))
def iter_test_file_objects(fileobj): """Given a file object yields the same file object which fails differently each time """ t = _TestFileObj(fileobj) # first figure out how much a successful attempt reads and how many # file object operations it executes. yield t for i in xrange(t.dataread): yield _TestFileObj(fileobj, stop_after=i) for i in xrange(t.operations): yield _TestFileObj(fileobj, fail_after=i)
def test_find_last(self): pages = [OggPage() for i in xrange(10)] for i, page in enumerate(pages): page.sequence = i data = BytesIO(b"".join([page.write() for page in pages])) self.failUnlessEqual(OggPage.find_last(data, pages[0].serial), pages[-1])
def _parse_adif(self, fileobj): r = BitReader(fileobj) try: copyright_id_present = r.bits(1) if copyright_id_present: r.skip(72) # copyright_id r.skip(1 + 1) # original_copy, home bitstream_type = r.bits(1) self.bitrate = r.bits(23) npce = r.bits(4) if bitstream_type == 0: r.skip(20) # adif_buffer_fullness pce = ProgramConfigElement(r) try: self.sample_rate = _FREQS[pce.sampling_frequency_index] except IndexError: pass self.channels = pce.channels # other pces.. for i in xrange(npce): ProgramConfigElement(r) r.align() except BitReaderError as e: raise AACError(e) # use bitrate + data size to guess length start = fileobj.tell() fileobj.seek(0, 2) length = fileobj.tell() - start if self.bitrate != 0: self.length = (8.0 * length) / self.bitrate
def test_find_last(self): pages = [OggPage() for i in xrange(10)] for i, page in enumerate(pages): page.sequence = i data = BytesIO(b"".join([page.write() for page in pages])) self.failUnlessEqual( OggPage.find_last(data, pages[0].serial), pages[-1])
def test_renumber_muxed(self): pages = [OggPage() for i in xrange(10)] for seq, page in enumerate(pages[0:1] + pages[2:]): page.serial = 0 page.sequence = seq pages[1].serial = 2 pages[1].sequence = 100 data = BytesIO(b"".join([page.write() for page in pages])) OggPage.renumber(data, 0, 20) data.seek(0) pages = [OggPage(data) for i in xrange(10)] self.failUnlessEqual(pages[1].serial, 2) self.failUnlessEqual(pages[1].sequence, 100) pages.pop(1) self.failUnlessEqual([page.sequence for page in pages], list(xrange(20, 29)))
def test_renumber(self): self.failUnlessEqual([page.sequence for page in self.pages], [0, 1, 2]) fileobj = BytesIO() for page in self.pages: fileobj.write(page.write()) fileobj.seek(0) OggPage.renumber(fileobj, 1, 10) fileobj.seek(0) pages = [OggPage(fileobj) for i in xrange(3)] self.failUnlessEqual([page.sequence for page in pages], [10, 11, 12]) fileobj.seek(0) OggPage.renumber(fileobj, 1, 20) fileobj.seek(0) pages = [OggPage(fileobj) for i in xrange(3)] self.failUnlessEqual([page.sequence for page in pages], [20, 21, 22])
def test_random_data_roundtrip(self): try: random_file = open("/dev/urandom", "rb") except (IOError, OSError): print("WARNING: Random data round trip test disabled.") return try: for i in xrange(10): num_packets = random.randrange(2, 100) lengths = [random.randrange(10, 10000) for i in xrange(num_packets)] packets = list(map(random_file.read, lengths)) self.failUnlessEqual( packets, OggPage.to_packets(OggPage.from_packets(packets))) finally: random_file.close()
def test_renumber_muxed(self): pages = [OggPage() for i in xrange(10)] for seq, page in enumerate(pages[0:1] + pages[2:]): page.serial = 0 page.sequence = seq pages[1].serial = 2 pages[1].sequence = 100 data = BytesIO(b"".join([page.write() for page in pages])) OggPage.renumber(data, 0, 20) data.seek(0) pages = [OggPage(data) for i in xrange(10)] self.failUnlessEqual(pages[1].serial, 2) self.failUnlessEqual(pages[1].sequence, 100) pages.pop(1) self.failUnlessEqual( [page.sequence for page in pages], list(xrange(20, 29)))
def test_renumber(self): self.failUnlessEqual( [page.sequence for page in self.pages], [0, 1, 2]) fileobj = BytesIO() for page in self.pages: fileobj.write(page.write()) fileobj.seek(0) OggPage.renumber(fileobj, 1, 10) fileobj.seek(0) pages = [OggPage(fileobj) for i in xrange(3)] self.failUnlessEqual([page.sequence for page in pages], [10, 11, 12]) fileobj.seek(0) OggPage.renumber(fileobj, 1, 20) fileobj.seek(0) pages = [OggPage(fileobj) for i in xrange(3)] self.failUnlessEqual([page.sequence for page in pages], [20, 21, 22])
def test_to_packets_empty_page(self): pages = [OggPage(), OggPage()] for i in xrange(len(pages)): pages[i].sequence = i assert OggPage.to_packets(pages, True) == [] assert OggPage.to_packets(pages, False) == [] pages = [OggPage(), OggPage(), OggPage()] pages[0].packets = [b"foo"] pages[0].complete = False pages[1].continued = True pages[1].complete = False pages[2].packets = [b"bar"] pages[2].continued = True for i in xrange(len(pages)): pages[i].sequence = i assert OggPage.to_packets(pages, True) == [b'foobar']
def test_bits(self): data = b"\x12\x34\x56\x78\x89\xAB\xCD\xEF" ref = cdata.uint64_be(data) for i in xrange(64): fo = cBytesIO(data) r = BitReader(fo) v = r.bits(i) << (64 - i) | r.bits(64 - i) self.assertEqual(v, ref)
def test_apic_duplicate_hash(self): id3 = ID3Tags() for i in xrange(10): apic = APIC(encoding=0, mime=u"b", type=3, desc=u"", data=b"a") id3._add(apic, False) self.assertEqual(len(id3), 10) for key, value in id3.items(): self.assertEqual(key, value.HashKey)
def test_delete_6106_79_51760(self): # This appears to be due to ANSI C limitations in read/write on rb+ # files. The problematic behavior only showed up in our mmap fallback # code for transfers of this or similar sizes. data = u''.join(map(text_type, xrange(12574))) # 51760 bytes data = data.encode("ascii") with self.file(data[:6106 + 79] + data[79:]) as o: delete_bytes(o, 6106, 79) self.failUnless(data == self.read(o))
def test_delete_6106_79_51760(self): # This appears to be due to ANSI C limitations in read/write on rb+ # files. The problematic behavior only showed up in our mmap fallback # code for transfers of this or similar sizes. data = u''.join(map(text_type, xrange(12574))) # 51760 bytes data = data.encode("ascii") o = self.file(data[:6106 + 79] + data[79:]) delete_bytes(o, 6106, 79) self.failUnless(data == self.read(o))
def test_many_changes(self, num_runs=5, num_changes=300, min_change_size=500, max_change_size=1000, min_buffer_size=1, max_buffer_size=2000): self.failUnless( min_buffer_size < min_change_size and max_buffer_size > max_change_size and min_change_size < max_change_size and min_buffer_size < max_buffer_size, "Given testing parameters make this test useless") for j in xrange(num_runs): data = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" * 1024 with self.file(data) as fobj: filesize = len(data) # Generate the list of changes to apply changes = [] for i in xrange(num_changes): change_size = random.randrange(min_change_size, max_change_size) change_offset = random.randrange(0, filesize) filesize += change_size changes.append((change_offset, change_size)) # Apply the changes, and make sure they all took. for offset, size in changes: buffer_size = random.randrange(min_buffer_size, max_buffer_size) insert_bytes(fobj, size, offset, BUFFER_SIZE=buffer_size) fobj.seek(0) self.failIfEqual(fobj.read(len(data)), data) fobj.seek(0, 2) self.failUnlessEqual(fobj.tell(), filesize) # Then, undo them. changes.reverse() for offset, size in changes: buffer_size = random.randrange(min_buffer_size, max_buffer_size) delete_bytes(fobj, size, offset, BUFFER_SIZE=buffer_size) fobj.seek(0) self.failUnless(fobj.read() == data)
def _parse_adts(self, fileobj, start_offset): max_initial_read = 512 max_resync_read = 10 max_sync_tries = 10 frames_max = 100 frames_needed = 3 # Try up to X times to find a sync word and read up to Y frames. # If more than Z frames are valid we assume a valid stream offset = start_offset for i in xrange(max_sync_tries): fileobj.seek(offset) s = _ADTSStream.find_stream(fileobj, max_initial_read) if s is None: raise AACError("sync not found") # start right after the last found offset offset += s.offset + 1 for i in xrange(frames_max): if not s.parse_frame(): break if not s.sync(max_resync_read): break if s.parsed_frames >= frames_needed: break else: raise AACError("no valid stream found (only %d frames)" % s.parsed_frames) self.sample_rate = s.frequency self.channels = s.channels self.bitrate = s.bitrate # size from stream start to end of file fileobj.seek(0, 2) stream_size = fileobj.tell() - (offset + s.offset) # approx self.length = 0.0 if s.frequency != 0: self.length = \ float(s.samples * stream_size) / (s.size * s.frequency)
def _parse_adts(self, fileobj, start_offset): max_initial_read = 512 max_resync_read = 10 max_sync_tries = 10 frames_max = 100 frames_needed = 3 # Try up to X times to find a sync word and read up to Y frames. # If more than Z frames are valid we assume a valid stream offset = start_offset for i in xrange(max_sync_tries): fileobj.seek(offset) s = _ADTSStream.find_stream(fileobj, max_initial_read) if s is None: raise AACError("sync not found") # start right after the last found offset offset += s.offset + 1 for i in xrange(frames_max): if not s.parse_frame(): break if not s.sync(max_resync_read): break if s.parsed_frames >= frames_needed: break else: raise AACError( "no valid stream found (only %d frames)" % s.parsed_frames) self.sample_rate = s.frequency self.channels = s.channels self.bitrate = s.bitrate # size from stream start to end of file fileobj.seek(0, 2) stream_size = fileobj.tell() - (offset + s.offset) # approx self.length = float(s.samples * stream_size) / (s.size * s.frequency)
def setUp(self): self.fileobj = open(os.path.join(DATA_DIR, "empty.ogg"), "rb") self.page = OggPage(self.fileobj) pages = [OggPage(), OggPage(), OggPage()] pages[0].packets = [b"foo"] pages[1].packets = [b"bar"] pages[2].packets = [b"baz"] for i in xrange(len(pages)): pages[i].sequence = i for page in pages: page.serial = 1 self.pages = pages
def _item_sort_key(key, value): # iTunes always writes the tags in order of "relevance", try # to copy it as closely as possible. order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb", "\xa9gen", "gnre", "trkn", "disk", "\xa9day", "cpil", "pgap", "pcst", "tmpo", "\xa9too", "----", "covr", "\xa9lyr"] order = dict(izip(order, xrange(len(order)))) last = len(order) # If there's no key-based way to distinguish, order by length. # If there's still no way, go by string comparison on the # values, so we at least have something determinstic. return (order.get(key[:4], last), len(repr(value)), repr(value))
def test_many_changes(self, num_runs=5, num_changes=300, min_change_size=500, max_change_size=1000, min_buffer_size=1, max_buffer_size=2000): self.failUnless(min_buffer_size < min_change_size and max_buffer_size > max_change_size and min_change_size < max_change_size and min_buffer_size < max_buffer_size, "Given testing parameters make this test useless") for j in xrange(num_runs): data = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" * 1024 fobj = self.file(data) filesize = len(data) # Generate the list of changes to apply changes = [] for i in xrange(num_changes): change_size = random.randrange( min_change_size, max_change_size) change_offset = random.randrange(0, filesize) filesize += change_size changes.append((change_offset, change_size)) # Apply the changes, and make sure they all took. for offset, size in changes: buffer_size = random.randrange( min_buffer_size, max_buffer_size) insert_bytes(fobj, size, offset, BUFFER_SIZE=buffer_size) fobj.seek(0) self.failIfEqual(fobj.read(len(data)), data) fobj.seek(0, 2) self.failUnlessEqual(fobj.tell(), filesize) # Then, undo them. changes.reverse() for offset, size in changes: buffer_size = random.randrange( min_buffer_size, max_buffer_size) delete_bytes(fobj, size, offset, BUFFER_SIZE=buffer_size) fobj.seek(0) self.failUnless(fobj.read() == data)
def parse_full(cls, asf, fileobj): """Raises ASFHeaderError""" header = cls() size, num_objects = cls.parse_size(fileobj) for i in xrange(num_objects): guid, size = struct.unpack("<16sQ", fileobj.read(24)) obj = BaseObject._get_object(guid) data = fileobj.read(size - 24) obj.parse(asf, data) header.objects.append(obj) return header
def test_renumber_extradata(self): fileobj = BytesIO() for page in self.pages: fileobj.write(page.write()) fileobj.write(b"left over data") fileobj.seek(0) # Trying to rewrite should raise an error... self.failUnlessRaises(Exception, OggPage.renumber, fileobj, 1, 10) fileobj.seek(0) # But the already written data should remain valid, pages = [OggPage(fileobj) for i in xrange(3)] self.failUnlessEqual([page.sequence for page in pages], [10, 11, 12]) # And the garbage that caused the error should be okay too. self.failUnlessEqual(fileobj.read(), b"left over data")
def test_find_last_last_empty(self): # https://github.com/quodlibet/mutagen/issues/308 pages = [OggPage() for i in xrange(10)] for i, page in enumerate(pages): page.sequence = i page.position = i pages[-1].last = True pages[-1].position = -1 data = BytesIO(b"".join([page.write() for page in pages])) page = OggPage.find_last(data, pages[-1].serial, finishing=True) assert page is not None assert page.position == 8 page = OggPage.find_last(data, pages[-1].serial, finishing=False) assert page is not None assert page.position == -1
def parse(self, asf, data): super(ExtendedContentDescriptionObject, self).parse(asf, data) num_attributes, = struct.unpack("<H", data[0:2]) pos = 2 for i in xrange(num_attributes): name_length, = struct.unpack("<H", data[pos:pos + 2]) pos += 2 name = data[pos:pos + name_length] name = name.decode("utf-16-le").strip("\x00") pos += name_length value_type, value_length = struct.unpack("<HH", data[pos:pos + 4]) pos += 4 value = data[pos:pos + value_length] pos += value_length attr = ASFBaseAttribute._get_type(value_type)(data=value) asf._tags.setdefault(self.GUID, []).append((name, attr))
def _parse_desc_length_file(cls, fileobj): """May raise ValueError""" value = 0 for i in xrange(4): try: b = cdata.uint8(fileobj.read(1)) except cdata.error as e: raise ValueError(e) value = (value << 7) | (b & 0x7F) if not b >> 7: break else: raise ValueError("invalid descriptor length") return value
def __init__(self, r): """Reads the program_config_element() Raises BitReaderError """ self.element_instance_tag = r.bits(4) self.object_type = r.bits(2) self.sampling_frequency_index = r.bits(4) num_front_channel_elements = r.bits(4) num_side_channel_elements = r.bits(4) num_back_channel_elements = r.bits(4) num_lfe_channel_elements = r.bits(2) num_assoc_data_elements = r.bits(3) num_valid_cc_elements = r.bits(4) mono_mixdown_present = r.bits(1) if mono_mixdown_present == 1: r.skip(4) stereo_mixdown_present = r.bits(1) if stereo_mixdown_present == 1: r.skip(4) matrix_mixdown_idx_present = r.bits(1) if matrix_mixdown_idx_present == 1: r.skip(3) elms = ( num_front_channel_elements + num_side_channel_elements + num_back_channel_elements ) channels = 0 for i in xrange(elms): channels += 1 element_is_cpe = r.bits(1) if element_is_cpe: channels += 1 r.skip(4) channels += num_lfe_channel_elements self.channels = channels r.skip(4 * num_lfe_channel_elements) r.skip(4 * num_assoc_data_elements) r.skip(5 * num_valid_cc_elements) r.align() comment_field_bytes = r.bits(8) r.skip(8 * comment_field_bytes)
def parse(self, asf, data): super(MetadataLibraryObject, self).parse(asf, data) num_attributes, = struct.unpack("<H", data[0:2]) pos = 2 for i in xrange(num_attributes): (language, stream, name_length, value_type, value_length) = struct.unpack("<HHHHI", data[pos:pos + 12]) pos += 12 name = data[pos:pos + name_length] name = name.decode("utf-16-le").strip("\x00") pos += name_length value = data[pos:pos + value_length] pos += value_length args = {'data': value, 'language': language, 'stream': stream} if value_type == 2: args['dword'] = False attr = ASFBaseAttribute._get_type(value_type)(**args) asf._tags.setdefault(self.GUID, []).append((name, attr))
def _parse_desc_length(data, offset): """Returns the decoded value and the new offset in data after the value. Can raise ValueError in case the value is too long or data too short. """ value = 0 for i in xrange(4): try: b, offset = cdata.uint8_from(data, offset) except cdata.error as e: raise ValueError(e) value = (value << 7) | (b & 0x7f) if not b >> 7: break else: raise ValueError("invalid descriptor length") return value, offset
def __init__(self, r): """Reads the program_config_element() Raises BitReaderError """ self.element_instance_tag = r.bits(4) self.object_type = r.bits(2) self.sampling_frequency_index = r.bits(4) num_front_channel_elements = r.bits(4) num_side_channel_elements = r.bits(4) num_back_channel_elements = r.bits(4) num_lfe_channel_elements = r.bits(2) num_assoc_data_elements = r.bits(3) num_valid_cc_elements = r.bits(4) mono_mixdown_present = r.bits(1) if mono_mixdown_present == 1: r.skip(4) stereo_mixdown_present = r.bits(1) if stereo_mixdown_present == 1: r.skip(4) matrix_mixdown_idx_present = r.bits(1) if matrix_mixdown_idx_present == 1: r.skip(3) elms = num_front_channel_elements + num_side_channel_elements + \ num_back_channel_elements channels = 0 for i in xrange(elms): channels += 1 element_is_cpe = r.bits(1) if element_is_cpe: channels += 1 r.skip(4) channels += num_lfe_channel_elements self.channels = channels r.skip(4 * num_lfe_channel_elements) r.skip(4 * num_assoc_data_elements) r.skip(5 * num_valid_cc_elements) r.align() comment_field_bytes = r.bits(8) r.skip(8 * comment_field_bytes)
def test_find_sync(self): def get_syncs(fileobj, max_read): start = fileobj.tell() pos = [] for i in iter_sync(fileobj, max_read): pos.append(fileobj.tell() - start) return pos self.assertEqual(get_syncs(cBytesIO(b"abc"), 100), []) self.assertEqual(get_syncs(cBytesIO(b""), 100), []) self.assertEqual(get_syncs(cBytesIO(b"a\xff\xe0"), 1), []) self.assertEqual(get_syncs(cBytesIO(b"a\xff\xc0\xff\xe0"), 100), [3]) self.assertEqual( get_syncs(cBytesIO(b"a\xff\xe0\xff\xe0\xff\xe0"), 100), [1, 3, 5]) for i in xrange(400): fileobj = cBytesIO(b"\x00" * i + b"\xff\xe0") self.assertEqual(get_syncs(fileobj, 100 + i), [i])
def load(self, fileobj, errors='replace', framing=True): """Parse a Vorbis comment from a file-like object. Keyword arguments: errors: 'strict', 'replace', or 'ignore'. This affects Unicode decoding and how other malformed content is interpreted. framing -- if true, fail if a framing bit is not present Framing bits are required by the Vorbis comment specification, but are not used in FLAC Vorbis comment blocks. """ try: vendor_length = cdata.uint_le(fileobj.read(4)) self.vendor = fileobj.read(vendor_length).decode('utf-8', errors) count = cdata.uint_le(fileobj.read(4)) for i in xrange(count): length = cdata.uint_le(fileobj.read(4)) try: string = fileobj.read(length).decode('utf-8', errors) except (OverflowError, MemoryError): raise error("cannot read %d bytes, too large" % length) try: tag, value = string.split('=', 1) except ValueError as err: if errors == "ignore": continue elif errors == "replace": tag, value = u"unknown%d" % i, string else: raise VorbisEncodingError(str(err)).with_traceback(sys.exc_info()[2]) try: tag = tag.encode('ascii', errors).decode('ascii') except UnicodeEncodeError: raise VorbisEncodingError("invalid tag name %r" % tag) else: if is_valid_key(tag): self.append((tag, value)) if framing and not ord_(fileobj.read(1)) & 0x01: raise VorbisUnsetFrameError("framing bit was unset") except (cdata.error, TypeError): raise error("file is not a valid Vorbis comment")
def parse(self, asf, data): super(CodecListObject, self).parse(asf, data) offset = 16 count, offset = cdata.uint32_le_from(data, offset) for i in xrange(count): try: offset, type_, name, desc, codec = \ self._parse_entry(data, offset) except cdata.error: raise ASFError("invalid codec entry") # go with the first audio entry if type_ == 2: name = name.strip() desc = desc.strip() asf.info.codec_type = codec asf.info.codec_name = name asf.info.codec_description = desc return
def parse_full(cls, asf, fileobj): """Raises ASFHeaderError""" header = cls() remaining_header, num_objects = cls.parse_size(fileobj) remaining_header -= 30 for i in xrange(num_objects): obj_header_size = 24 if remaining_header < obj_header_size: raise ASFHeaderError("invalid header size") data = fileobj.read(obj_header_size) if len(data) != obj_header_size: raise ASFHeaderError("truncated") remaining_header -= obj_header_size guid, size = struct.unpack("<16sQ", data) obj = BaseObject._get_object(guid) payload_size = size - obj_header_size if remaining_header < payload_size: raise ASFHeaderError("invalid object size") remaining_header -= payload_size try: data = fileobj.read(payload_size) except (OverflowError, MemoryError): # read doesn't take 64bit values raise ASFHeaderError("invalid header size") if len(data) != payload_size: raise ASFHeaderError("truncated") try: obj.parse(asf, data) except struct.error: raise ASFHeaderError("truncated") header.objects.append(obj) return header
def __init__(self, fileobj): """Reads the VBRI header or raises VBRIHeaderError. The file position is undefined after this returns """ data = fileobj.read(26) if len(data) != 26 or not data.startswith(b"VBRI"): raise VBRIHeaderError("Not a VBRI header") offset = 4 self.version, offset = cdata.uint16_be_from(data, offset) if self.version != 1: raise VBRIHeaderError( "Unsupported header version: %r" % self.version) offset += 2 # float16.. can't do self.quality, offset = cdata.uint16_be_from(data, offset) self.bytes, offset = cdata.uint32_be_from(data, offset) self.frames, offset = cdata.uint32_be_from(data, offset) toc_num_entries, offset = cdata.uint16_be_from(data, offset) self.toc_scale_factor, offset = cdata.uint16_be_from(data, offset) toc_entry_size, offset = cdata.uint16_be_from(data, offset) self.toc_frames, offset = cdata.uint16_be_from(data, offset) toc_size = toc_entry_size * toc_num_entries toc_data = fileobj.read(toc_size) if len(toc_data) != toc_size: raise VBRIHeaderError("VBRI header truncated") self.toc = [] if toc_entry_size == 2: unpack = partial(cdata.uint16_be_from, toc_data) elif toc_entry_size == 4: unpack = partial(cdata.uint32_be_from, toc_data) else: raise VBRIHeaderError("Invalid TOC entry size") self.toc = [unpack(i)[0] for i in xrange(0, toc_size, toc_entry_size)]
def parse_full(cls, asf, fileobj): """Raises ASFHeaderError""" header = cls() remaining_header, num_objects = cls.parse_size(fileobj) remaining_header -= 30 for i in xrange(num_objects): obj_header_size = 24 if remaining_header < obj_header_size: raise ASFHeaderError("invalid header size") data = fileobj.read(obj_header_size) if len(data) != obj_header_size: raise ASFHeaderError("truncated") remaining_header -= obj_header_size guid, size = struct.unpack("<16sQ", data) obj = BaseObject._get_object(guid) payload_size = size - obj_header_size if remaining_header < payload_size: raise ASFHeaderError("invalid object size") remaining_header -= payload_size try: data = fileobj.read(payload_size) except OverflowError: # read doesn't take 64bit values raise ASFHeaderError("invalid header size") if len(data) != payload_size: raise ASFHeaderError("truncated") obj.parse(asf, data) header.objects.append(obj) return header
def _read_midi_length(fileobj): """Returns the duration in seconds. Can raise all kind of errors...""" TEMPO, MIDI = range(2) def read_chunk(fileobj): info = fileobj.read(8) if len(info) != 8: raise SMFError("truncated") chunklen = struct.unpack(">I", info[4:])[0] data = fileobj.read(chunklen) if len(data) != chunklen: raise SMFError("truncated") return info[:4], data identifier, chunk = read_chunk(fileobj) if identifier != b"MThd": raise SMFError("Not a MIDI file") if len(chunk) != 6: raise SMFError("truncated") format_, ntracks, tickdiv = struct.unpack(">HHH", chunk) if format_ > 1: raise SMFError("Not supported format %d" % format_) if tickdiv >> 15: # fps = (-(tickdiv >> 8)) & 0xFF # subres = tickdiv & 0xFF # never saw one of those raise SMFError("Not supported timing interval") # get a list of events and tempo changes for each track tracks = [] first_tempos = None for tracknum in xrange(ntracks): identifier, chunk = read_chunk(fileobj) if identifier != b"MTrk": continue events, tempos = _read_track(chunk) # In case of format == 1, copy the first tempo list to all tracks first_tempos = first_tempos or tempos if format_ == 1: tempos = list(first_tempos) events += tempos events.sort() tracks.append(events) # calculate the duration of each track durations = [] for events in tracks: tempo = 500000 parts = [] deltasum = 0 for (dummy, type_, data) in events: if type_ == TEMPO: parts.append((deltasum, tempo)) tempo = data deltasum = 0 else: deltasum += data parts.append((deltasum, tempo)) duration = 0 for (deltasum, tempo) in parts: quarter, tpq = deltasum / float(tickdiv), tempo duration += (quarter * tpq) duration /= 10 ** 6 durations.append(duration) # return the longest one return max(durations)
class MPEGFrame(object): # Map (version, layer) tuples to bitrates. __BITRATE = { (1, 1): [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448], (1, 2): [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384], (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320], (2, 1): [0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256], (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160], } __BITRATE[(2, 3)] = __BITRATE[(2, 2)] for i in xrange(1, 4): __BITRATE[(2.5, i)] = __BITRATE[(2, i)] # Map version to sample rates. __RATES = { 1: [44100, 48000, 32000], 2: [22050, 24000, 16000], 2.5: [11025, 12000, 8000] } sketchy = False def __init__(self, fileobj): """Raises HeaderNotFoundError""" self.frame_offset = fileobj.tell() r = BitReader(fileobj) try: if r.bits(11) != 0x7ff: raise HeaderNotFoundError("invalid sync") version = r.bits(2) layer = r.bits(2) protection = r.bits(1) bitrate = r.bits(4) sample_rate = r.bits(2) padding = r.bits(1) r.skip(1) # private self.mode = r.bits(2) r.skip(6) except BitReaderError: raise HeaderNotFoundError("truncated header") assert r.get_position() == 32 and r.is_aligned() # try to be strict here to redice the chance of a false positive if version == 1 or layer == 0 or sample_rate == 0x3 or \ bitrate == 0xf or bitrate == 0: raise HeaderNotFoundError("invalid header") self.channels = 1 if self.mode == MONO else 2 self.version = [2.5, None, 2, 1][version] self.layer = 4 - layer self.protected = not protection self.padding = bool(padding) self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate] self.bitrate *= 1000 self.sample_rate = self.__RATES[self.version][sample_rate] if self.layer == 1: frame_size = 384 slot = 4 elif self.version >= 2 and self.layer == 3: frame_size = 576 slot = 1 else: frame_size = 1152 slot = 1 frame_length = ( ((frame_size // 8 * self.bitrate) // self.sample_rate) + padding) * slot self.sketchy = True # Try to find/parse the Xing header, which trumps the above length # and bitrate calculation. if self.layer == 3: self._parse_vbr_header(fileobj, self.frame_offset, frame_size, frame_length) fileobj.seek(self.frame_offset + frame_length, 0) def _parse_vbr_header(self, fileobj, frame_offset, frame_size, frame_length): """Does not raise""" # Xing xing_offset = XingHeader.get_offset(self) fileobj.seek(frame_offset + xing_offset, 0) try: xing = XingHeader(fileobj) except XingHeaderError: pass else: lame = xing.lame_header self.sketchy = False self.bitrate_mode = _guess_xing_bitrate_mode(xing) self.encoder_settings = xing.get_encoder_settings() if xing.frames != -1: samples = frame_size * xing.frames if xing.bytes != -1 and samples > 0: # the first frame is only included in xing.bytes but # not in xing.frames, skip it. audio_bytes = max(0, xing.bytes - frame_length) self.bitrate = intround(( audio_bytes * 8 * self.sample_rate) / float(samples)) if lame is not None: samples -= lame.encoder_delay_start samples -= lame.encoder_padding_end if samples < 0: # older lame versions wrote bogus delay/padding for short # files with low bitrate samples = 0 self.length = float(samples) / self.sample_rate if xing.lame_version_desc: self.encoder_info = u"LAME %s" % xing.lame_version_desc if lame is not None: self.track_gain = lame.track_gain_adjustment self.track_peak = lame.track_peak self.album_gain = lame.album_gain_adjustment return # VBRI vbri_offset = VBRIHeader.get_offset(self) fileobj.seek(frame_offset + vbri_offset, 0) try: vbri = VBRIHeader(fileobj) except VBRIHeaderError: pass else: self.bitrate_mode = BitrateMode.VBR self.encoder_info = u"FhG" self.sketchy = False self.length = float(frame_size * vbri.frames) / self.sample_rate if self.length: self.bitrate = int((vbri.bytes * 8) / self.length)
def __init__(self, fileobj, offset=None): """Parse MPEG stream information from a file-like object. If an offset argument is given, it is used to start looking for stream information and Xing headers; otherwise, ID3v2 tags will be skipped automatically. A correct offset can make loading files significantly faster. Raises HeaderNotFoundError, error """ if offset is None: fileobj.seek(0, 0) else: fileobj.seek(offset, 0) # skip anyway, because wmp stacks multiple id3 tags skip_id3(fileobj) # find a sync in the first 1024K, give up after some invalid syncs max_read = 1024 * 1024 max_syncs = 1000 enough_frames = 4 min_frames = 2 self.sketchy = True frames = [] first_frame = None for _ in iter_sync(fileobj, max_read): max_syncs -= 1 if max_syncs <= 0: break for _ in xrange(enough_frames): try: frame = MPEGFrame(fileobj) except HeaderNotFoundError: break frames.append(frame) if not frame.sketchy: break # if we have min frames, save it in case this is all we get if len(frames) >= min_frames and first_frame is None: first_frame = frames[0] # if the last frame was a non-sketchy one (has a valid vbr header) # we use that if frames and not frames[-1].sketchy: first_frame = frames[-1] self.sketchy = False break # if we have enough valid frames, use the first if len(frames) >= enough_frames: first_frame = frames[0] self.sketchy = False break # otherwise start over with the next sync del frames[:] if first_frame is None: raise HeaderNotFoundError("can't sync to MPEG frame") assert first_frame self.length = -1 sketchy = self.sketchy self.__dict__.update(first_frame.__dict__) self.sketchy = sketchy # no length, estimate based on file size if self.length == -1: fileobj.seek(0, 2) content_size = fileobj.tell() - first_frame.frame_offset self.length = 8 * content_size / float(self.bitrate)
return BitrateMode.VBR # everything else undefined, continue guessing # info tags get only written by lame for cbr files if xing.is_info: return BitrateMode.CBR # older lame and non-lame with some variant of vbr if xing.vbr_scale != -1 or xing.lame_version_desc: return BitrateMode.VBR return BitrateMode.UNKNOWN # Mode values. STEREO, JOINTSTEREO, DUALCHANNEL, MONO = xrange(4) class MPEGFrame(object): # Map (version, layer) tuples to bitrates. __BITRATE = { (1, 1): [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448], (1, 2): [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384], (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320], (2, 1): [0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256], (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64,
def test_parened_num(self): for i in xrange(len(self.GENRES)): self.assertEquals(self._g("(%02d)" % i), [self.GENRES[i]])
def test_getitem(self): for i in xrange(len(self.value)): self.failUnlessEqual(self.sample[i], self.value[i])
def _read_midi_length(fileobj): """Returns the duration in seconds. Can raise all kind of errors...""" TEMPO, MIDI = range(2) def read_chunk(fileobj): info = fileobj.read(8) if len(info) != 8: raise SMFError("truncated") chunklen = struct.unpack(">I", info[4:])[0] data = fileobj.read(chunklen) if len(data) != chunklen: raise SMFError("truncated") return info[:4], data identifier, chunk = read_chunk(fileobj) if identifier != b"MThd": raise SMFError("Not a MIDI file") if len(chunk) != 6: raise SMFError("truncated") format_, ntracks, tickdiv = struct.unpack(">HHH", chunk) if format_ > 1: raise SMFError("Not supported format %d" % format_) if tickdiv >> 15: # fps = (-(tickdiv >> 8)) & 0xFF # subres = tickdiv & 0xFF # never saw one of those raise SMFError("Not supported timing interval") # get a list of events and tempo changes for each track tracks = [] first_tempos = None for tracknum in xrange(ntracks): identifier, chunk = read_chunk(fileobj) if identifier != b"MTrk": continue events, tempos = _read_track(chunk) # In case of format == 1, copy the first tempo list to all tracks first_tempos = first_tempos or tempos if format_ == 1: tempos = list(first_tempos) events += tempos events.sort() tracks.append(events) # calculate the duration of each track durations = [] for events in tracks: tempo = 500000 parts = [] deltasum = 0 for (dummy, type_, data) in events: if type_ == TEMPO: parts.append((deltasum, tempo)) tempo = data deltasum = 0 else: deltasum += data parts.append((deltasum, tempo)) duration = 0 for (deltasum, tempo) in parts: quarter, tpq = deltasum / float(tickdiv), tempo duration += (quarter * tpq) duration /= 10**6 durations.append(duration) # return the longest one return max(durations)