def parseSTRF(self, t, strh): fccType = strh['fccType'] retval = {} if fccType == 'auds': ( retval['wFormatTag'], retval['nChannels'], retval['nSamplesPerSec'], retval['nAvgBytesPerSec'], retval['nBlockAlign'], retval['nBitsPerSample'], ) = struct.unpack('<HHHHHH', t[0:12]) ai = mediainfo.AudioInfo() ai.samplerate = retval['nSamplesPerSec'] ai.channels = retval['nChannels'] ai.samplebits = retval['nBitsPerSample'] ai.bitrate = retval['nAvgBytesPerSec'] * 8 # TODO: set code if possible # http://www.stats.uwa.edu.au/Internal/Specs/DXALL/FileSpec/Languages # ai.language = strh['wLanguage'] try: ai.codec = fourcc.RIFFWAVE[retval['wFormatTag']] except: ai.codec = "Unknown" self.audio.append(ai) elif fccType == 'vids': v = struct.unpack('<IIIHH', t[0:16]) ( retval['biSize'], retval['biWidth'], retval['biHeight'], retval['biPlanes'], retval['biBitCount'], ) = v retval['fourcc'] = t[16:20] v = struct.unpack('IIIII', t[20:40]) ( retval['biSizeImage'], retval['biXPelsPerMeter'], retval['biYPelsPerMeter'], retval['biClrUsed'], retval['biClrImportant'], ) = v vi = mediainfo.VideoInfo() try: vi.codec = fourcc.RIFFCODEC[t[16:20]] except: vi.codec = "Unknown" vi.width = retval['biWidth'] vi.height = retval['biHeight'] vi.bitrate = strh['dwRate'] vi.fps = round(float(strh['dwRate'] * 100) / strh['dwScale']) / 100 vi.length = strh['dwLength'] / vi.fps self.video.append(vi) return retval
def _read_header(self, object_id, s): if object_id == 'PROP': prop = struct.unpack('>9IHH', s) _print(prop) if object_id == 'MDPR': mdpr = struct.unpack('>H7I', s[:30]) _print(mdpr) self.length = mdpr[7] / 1000 (stream_name_size, ) = struct.unpack('>B', s[30:31]) stream_name = s[31:31 + stream_name_size] pos = 31 + stream_name_size (mime_type_size, ) = struct.unpack('>B', s[pos:pos + 1]) mime = s[pos + 1:pos + 1 + mime_type_size] pos += mime_type_size + 1 (type_specific_len, ) = struct.unpack('>I', s[pos:pos + 4]) type_specific = s[pos + 4:pos + 4 + type_specific_len] pos += 4 + type_specific_len if mime[:5] == 'audio': ai = mediainfo.AudioInfo() ai.id = mdpr[0] ai.bitrate = mdpr[2] self.audio.append(ai) elif mime[:5] == 'video': vi = mediainfo.VideoInfo() vi.id = mdpr[0] vi.bitrate = mdpr[2] self.video.append(vi) else: _print("Unknown: %s" % mime) if object_id == 'CONT': pos = 0 (title_len, ) = struct.unpack('>H', s[pos:pos + 2]) self.title = s[2:title_len + 2] pos += title_len + 2 (author_len, ) = struct.unpack('>H', s[pos:pos + 2]) self.artist = s[pos + 2:pos + author_len + 2] pos += author_len + 2 (copyright_len, ) = struct.unpack('>H', s[pos:pos + 2]) self.copyright = s[pos + 2:pos + copyright_len + 2] pos += copyright_len + 2 (comment_len, ) = struct.unpack('>H', s[pos:pos + 2]) self.comment = s[pos + 2:pos + comment_len + 2]
def _readatom(self, file): s = file.read(8) if len(s) < 8: return 0 atomsize,atomtype = struct.unpack('>I4s', s) if not str(atomtype).decode('latin1').isalnum(): # stop at nonsense data return 0 if mediainfo.DEBUG or ATOM_DEBUG: print "%s [%X]" % (atomtype,atomsize) if atomtype == 'udta': # Userdata (Metadata) pos = 0 tabl = {} i18ntabl = {} atomdata = file.read(atomsize-8) while pos < atomsize-12: (datasize,datatype) = struct.unpack('>I4s', atomdata[pos:pos+8]) if ord(datatype[0]) == 169: # i18n Metadata... mypos = 8+pos while mypos < datasize+pos: # first 4 Bytes are i18n header (tlen,lang) = struct.unpack('>HH', atomdata[mypos:mypos+4]) i18ntabl[lang] = i18ntabl.get(lang, {}) i18ntabl[lang][datatype[1:]] = atomdata[mypos+4:mypos+tlen+4] mypos += tlen+4 elif datatype == 'WLOC': # Drop Window Location pass else: if ord(atomdata[pos+8:pos+datasize][0]) > 1: tabl[datatype] = atomdata[pos+8:pos+datasize] pos += datasize if len(i18ntabl.keys()) > 0: for k in i18ntabl.keys(): if QTLANGUAGES.has_key(k): self.appendtable('QTUDTA', i18ntabl[k], QTLANGUAGES[k]) self.appendtable('QTUDTA', tabl, QTLANGUAGES[k]) else: #print "NO i18" self.appendtable('QTUDTA', tabl) elif atomtype == 'trak': atomdata = file.read(atomsize-8) pos = 0 vi = None ai = None info = None while pos < atomsize-8: (datasize,datatype) = struct.unpack('>I4s', atomdata[pos:pos+8]) if datatype == 'tkhd': tkhd = struct.unpack('>6I8x4H36xII', atomdata[pos+8:pos+datasize]) vi = mediainfo.VideoInfo() vi.width = tkhd[10] >> 16 vi.height = tkhd[11] >> 16 vi.id = tkhd[3] ai = mediainfo.AudioInfo() ai.id = tkhd[3] try: # XXX Date number of Seconds is since January 1st 1904!!! # XXX 2082844800 is the difference between Unix and Apple time # XXX Fix me to work on Apple, too self.date = int(tkhd[1]) - 2082844800 self.date = time.strftime('%y/%m/%d', time.gmtime(self.date)) except Exception, e: print 'ex', e elif datatype == 'mdia': pos += 8 datasize -= 8 if ATOM_DEBUG: print '--> mdia information' while datasize: mdia = struct.unpack('>I4s', atomdata[pos:pos+8]) if mdia[1] == 'mdhd': mdhd = struct.unpack('>IIIIIhh', atomdata[pos+8:pos+8+24]) # duration / time scale if vi: vi.length = mdhd[4] / mdhd[3] if ai: ai.length = mdhd[4] / mdhd[3] if mdhd[5] in QTLANGUAGES: ai.language = QTLANGUAGES[mdhd[5]] # mdhd[6] == quality self.length = max(self.length, mdhd[4] / mdhd[3]) elif mdia[1] == 'minf': # minf has only atoms inside pos -= (mdia[0] - 8) datasize += (mdia[0] - 8) elif mdia[1] == 'stbl': # stbl has only atoms inside pos -= (mdia[0] - 8) datasize += (mdia[0] - 8) elif mdia[1] == 'hdlr': hdlr = struct.unpack('>I4s4s', atomdata[pos+8:pos+8+12]) if hdlr[1] == 'mhlr': if hdlr[2] == 'vide' and not vi in self.video: self.video.append(vi) info = vi if hdlr[2] == 'soun' and not ai in self.audio: self.audio.append(ai) info = ai elif mdia[1] == 'stsd': stsd = struct.unpack('>2I', atomdata[pos+8:pos+8+8]) if stsd[1] > 0 and info: codec = struct.unpack('>I4s', atomdata[pos+16:pos+16+8]) info.codec = codec[1] if info.codec == 'jpeg': # jpeg is no video, remove it from the list self.video.remove(vi) info = None elif mdia[1] == 'dinf': dref = struct.unpack('>I4s', atomdata[pos+8:pos+8+8]) if ATOM_DEBUG: print ' --> %s, %s' % mdia print ' --> %s, %s (reference)' % dref elif ATOM_DEBUG: if mdia[1].startswith('st'): print ' --> %s, %s (sample)' % mdia elif mdia[1] in ('vmhd', 'smhd'): print ' --> %s, %s (media information header)' % mdia else: print ' --> %s, %s (unknown)' % mdia pos += mdia[0] datasize -= mdia[0] elif datatype == 'udta' and ATOM_DEBUG: print struct.unpack('>I4s', atomdata[:8]) elif ATOM_DEBUG: if datatype == 'edts': print "--> %s [%d] (edit list)" % (datatype, datasize) else: print "--> %s [%d] (unknown)" % (datatype, datasize) pos += datasize
def ReadPESHeader(self, offset, buffer, id=0): """ Parse a PES header. Since it starts with 0x00 0x00 0x01 like 'normal' mpegs, this function will return (0, -1) when it is no PES header or (packet length, timestamp position (maybe -1)) http://dvd.sourceforge.net/dvdinfo/pes-hdr.html """ if not buffer[0:3] == '\x00\x00\x01': return 0, -1 packet_length = (ord(buffer[4]) << 8) + ord(buffer[5]) + 6 align = ord(buffer[6]) & 4 header_length = ord(buffer[8]) # PES ID (starting with 001) if ord(buffer[3]) & 0xE0 == 0xC0: id = id or ord(buffer[3]) & 0x1F for a in self.audio: if a.id == id: break else: self.audio.append(mediainfo.AudioInfo()) self.audio[-1].id = id self.audio[-1].keys.append('id') elif ord(buffer[3]) & 0xF0 == 0xE0: id = id or ord(buffer[3]) & 0xF for v in self.video: if v.id == id: break else: self.video.append(mediainfo.VideoInfo()) self.video[-1].id = id self.video[-1].keys.append('id') # new mpeg starting if buffer[header_length+9:header_length+13] == \ '\x00\x00\x01\xB3' and not self.sequence_header_offset: # yes, remember offset for later use self.sequence_header_offset = offset + header_length + 9 elif ord(buffer[3]) == 189 or ord(buffer[3]) == 191: # private stream. we don't know, but maybe we can guess later id = id or ord(buffer[3]) & 0xF if align and buffer[header_length + 9:header_length + 11] == '\x0b\x77': # AC3 stream for a in self.audio: if a.id == id: break else: self.audio.append(mediainfo.AudioInfo()) self.audio[-1].id = id self.audio[-1].codec = 'AC3' self.audio[-1].keys.append('id') else: # unknown content pass ptsdts = ord(buffer[7]) >> 6 if ptsdts and ptsdts == ord(buffer[9]) >> 4: if ord(buffer[9]) >> 4 != ptsdts: print 'WARNING: bad PTS/DTS, please contact us' return packet_length, -1 # timestamp = self.ReadPTS(buffer[9:14]) high = ((ord(buffer[9]) & 0xF) >> 1) med = (ord(buffer[10]) << 7) + (ord(buffer[11]) >> 1) low = (ord(buffer[12]) << 7) + (ord(buffer[13]) >> 1) return packet_length, 9 return packet_length, -1
def ReadHeader(self, buffer, offset): """ Handle MPEG header in buffer on position offset Return -1 on error, new offset or 0 if the new offset can't be scanned """ if buffer[offset:offset + 3] != '\x00\x00\x01': return -1 id = ord(buffer[offset + 3]) if id == PADDING_PKT: return offset + (ord(buffer[offset + 4]) << 8) + ord( buffer[offset + 5]) + 6 if id == PACK_PKT: if ord(buffer[offset + 4]) & 0xF0 == 0x20: self.type = 'MPEG1 video' self.get_time = self.ReadSCRMpeg1 return offset + 12 elif (ord(buffer[offset + 4]) & 0xC0) == 0x40: self.type = 'MPEG2 video' self.get_time = self.ReadSCRMpeg2 return offset + (ord(buffer[offset + 13]) & 0x07) + 14 else: # WTF? Very strange return -1 if 0xC0 <= id <= 0xDF: # code for audio stream for a in self.audio: if a.id == id: break else: self.audio.append(mediainfo.AudioInfo()) self.audio[-1].id = id self.audio[-1].keys.append('id') return 0 if 0xE0 <= id <= 0xEF: # code for video stream for v in self.video: if v.id == id: break else: self.video.append(mediainfo.VideoInfo()) self.video[-1].id = id self.video[-1].keys.append('id') return 0 if id == SEQ_HEAD: # sequence header, remember that position for later use self.sequence_header_offset = offset return 0 if id in (PRIVATE_STREAM1, PRIVATE_STREAM2): # private stream. we don't know, but maybe we can guess later add = ord(buffer[offset + 8]) # if (ord(buffer[offset+6]) & 4) or 1: # id = ord(buffer[offset+10+add]) if buffer[offset + 11 + add:offset + 15 + add].find('\x0b\x77') != -1: # AC3 stream for a in self.audio: if a.id == id: break else: self.audio.append(mediainfo.AudioInfo()) self.audio[-1].id = id self.audio[-1].codec = 'AC3' self.audio[-1].keys.append('id') return 0 if id == SYS_PKT: return 0 if id == EXT_START: return 0 return 0
def _getnextheader(self, s): r = struct.unpack('<16sQ', s[:24]) (guidstr, objsize) = r guid = self._parseguid(guidstr) if guid == GUIDS['ASF_File_Properties_Object']: _print("File Properties Object") val = struct.unpack('<16s6Q4I', s[24:24 + 80]) (fileid, size, date, packetcount, duration, \ senddur, preroll, flags, minpack, maxpack, maxbr) = \ val self.length = duration / 10000000 elif guid == GUIDS['ASF_Stream_Properties_Object']: _print("Stream Properties Object [%d]" % objsize) streamtype = self._parseguid(s[24:40]) errortype = self._parseguid(s[40:56]) offset, typelen, errorlen, flags = struct.unpack( '>QIIH4x', s[56:78]) strno = flags & 63 encrypted = flags >> 15 if streamtype == GUIDS['ASF_Video_Media']: vi = mediainfo.VideoInfo() #vi.width, vi.height, formatsize = struct.unpack('<IIxH', s[78:89]) vi.width, vi.height, depth, codec, = struct.unpack( '<4xII2xH4s', s[89:89 + 20]) vi.codec = fourcc.RIFFCODEC[codec] vi.id = strno self.video.append(vi) elif streamtype == GUIDS['ASF_Audio_Media']: ai = mediainfo.AudioInfo() twocc, ai.channels, ai.samplerate, bitrate, block, ai.samplebits, = struct.unpack( '<HHIIHH', s[78:78 + 16]) ai.bitrate = 8 * bitrate # XXX Is this right? ai.codec = fourcc.RIFFWAVE[twocc] ai.id = strno self.audio.append(ai) pass elif guid == GUIDS['ASF_Header_Extension_Object']: _print("ASF_Header_Extension_Object %d" % objsize) size = struct.unpack('<I', s[42:46])[0] data = s[46:46 + size] while len(data): _print("Sub:") h = self._getnextheader(data) data = data[h[1]:] elif guid == GUIDS['ASF_Codec_List_Object']: _print("List Object") pass elif guid == GUIDS['ASF_Error_Correction_Object']: _print("Error Correction") pass elif guid == GUIDS['ASF_Content_Description_Object']: _print("Content Description Object") val = struct.unpack('<5H', s[24:24 + 10]) pos = 34 strings = [] for i in val: strings.append(s[pos:pos + i].replace('\0', '').lstrip().rstrip()) pos += i (self.title, self.artist, self.copyright, self.caption, rating) = tuple(strings) elif guid == GUIDS['ASF_Extended_Content_Description_Object']: (count, ) = struct.unpack('<H', s[24:26]) pos = 26 descriptor = {} for i in range(0, count): # Read additional content descriptors d = self._parsekv(s[pos:]) pos += d[0] descriptor[d[1]] = d[2] self.appendtable('ASFDESCRIPTOR', descriptor) elif guid == GUIDS['ASF_Metadata_Object']: (count, ) = struct.unpack('<H', s[24:26]) pos = 26 descriptor = {} for i in range(0, count): # Read additional content descriptors d = self._parsekv2(s[pos:]) pos += d[0] descriptor[d[1]] = d[2] # TODO: Find the stream in self.audio and self.video and # append it there instead of here self.appendtable('ASFMETADATA%d' % d[3], descriptor) elif guid == GUIDS['ASF_Language_List_Object']: count = struct.unpack('<H', s[24:26])[0] pos = 26 lang = [] for i in range(0, count): idlen = struct.unpack('<B', s[pos:pos + 1])[0] idstring = s[pos + 1:pos + 1 + idlen] _print("Language: %d/%d: %s" % (i + 1, count, idstring)) lang.append(idstring) pos += 1 + idlen if len(lang) == 1: self.language = lang[0] else: self.language = tuple(lang) # TODO: Find the stream in self.audio and self.video and # set it there instead of here elif guid == GUIDS['ASF_Stream_Bitrate_Properties_Object']: (count, ) = struct.unpack('<H', s[24:26]) pos = 26 for i in range(0, count): strno, avbitrate = struct.unpack('<HI', s[pos:pos + 6]) strno &= 63 _print("Stream %d Bitrate: %d" % (strno, avbitrate)) # TODO: Find the stream in self.audio and self.video and # set it there instead of here else: # Just print the type: bfail = 1 for h in GUIDS.keys(): if GUIDS[h] == guid: _print("Unparsed %s [%d]" % (h, objsize)) bfail = 0 if bfail: _print("unknown: %s [%d]" % (self._printguid(guid), objsize)) return r
def _parseHeader(self, header, granule): headerlen = len(header) flags = ord(header[0]) if headerlen >= 30 and header[1:7] == 'vorbis': #print("Vorbis Audio Header") ai = mediainfo.AudioInfo() ai.version, ai.channels, ai.samplerate, bitrate_max, ai.bitrate, \ bitrate_min, blocksize, framing = \ struct.unpack('<IBIiiiBB',header[7:7+23]) ai.codec = 'Vorbis' #ai.granule = granule #ai.length = granule / ai.samplerate self.audio.append(ai) self.all_streams.append(ai) elif headerlen >= 7 and header[1:7] == 'theora': #print "Theora Header" # Theora Header # XXX Finish Me vi = mediainfo.VideoInfo() vi.codec = 'theora' self.video.append(vi) self.all_streams.append(vi) elif headerlen >= 142 and header[ 1:36] == 'Direct Show Samples embedded in Ogg': #print 'Direct Show Samples embedded in Ogg' # Old Directshow format # XXX Finish Me vi = mediainfo.VideoInfo() vi.codec = 'dshow' self.video.append(vi) self.all_streams.append(vi) elif flags & PACKET_TYPE_BITS == PACKET_TYPE_HEADER and headerlen >= struct.calcsize( STREAM_HEADER_VIDEO) + 1: #print "New Directshow Format" # New Directshow Format htype = header[1:9] if htype[:5] == 'video': streamheader = struct.unpack( STREAM_HEADER_VIDEO, header[9:struct.calcsize(STREAM_HEADER_VIDEO) + 9]) vi = mediainfo.VideoInfo() (type, ssize, timeunit, samplerate, vi.length, buffersize, \ vi.bitrate, vi.width, vi.height) = streamheader vi.width /= 65536 vi.height /= 65536 # XXX length, bitrate are very wrong try: vi.codec = fourcc.RIFFCODEC[type] except: vi.codec = 'Unknown (%s)' % type vi.fps = 10000000 / timeunit self.video.append(vi) self.all_streams.append(vi) elif htype[:5] == 'audio': streamheader = struct.unpack( STREAM_HEADER_AUDIO, header[9:struct.calcsize(STREAM_HEADER_AUDIO) + 9]) ai = mediainfo.AudioInfo() (type, ssize, timeunit, ai.samplerate, ai.length, buffersize, ai.bitrate, ai.channels, bloc, ai.bitrate) = streamheader self.samplerate = ai.samplerate _print("Samplerate %d" % self.samplerate) self.audio.append(ai) self.all_streams.append(ai) elif htype[:4] == 'text': subtitle = mediainfo.MediaInfo() subtitle.keys.append('language') subtitle.type = 'subtitle' subtitle.length = 0 self.all_streams.append(subtitle) else: _print("Unknown Header")
def process_one_track(self, track): # Process all the items at the track level tabelem = self.process_one_level(track) # We have the dict of track eleme, now build the MMPYTHON information type = tabelem[MATROSKA_TRACK_TYPE_ID] mytype = type.get_value() _print("Track type found with UID %d" % mytype) if (mytype == MATROSKA_VIDEO_TRACK): _print("VIDEO TRACK found !!") #VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'samplebits', # 'width', 'height', 'fps', 'aspect'] vi = mediainfo.VideoInfo() try: elem = tabelem[MATROSKA_CODEC_ID] vi.codec = elem.get_data() except: vi.codec = 'Unknown' try: elem = tabelem[MATROSKA_FRAME_DURATION_ID] vi.fps = 1 / (pow(10, -9) * (elem.get_value())) except: vi.fps = 0 try: vinfo = tabelem[MATROSKA_VIDEO_SETTINGS_ID] vidtab = self.process_one_level(vinfo) vi.width = vidtab[MATROSKA_VID_WIDTH_ID].get_value() vi.height = vidtab[MATROSKA_VID_HEIGHT_ID].get_value() except: _print("No other info about video track !!!") self.video.append(vi) elif (mytype == MATROSKA_AUDIO_TRACK): _print("AUDIO TRACK found !!") #AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'samplebits', # 'bitrate', 'language'] ai = mediainfo.AudioInfo() try: elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID] ai.language = elem.get_data() ai['language'] = elem.get_data() except: ai.language = 'en' ai['language'] = 'en' try: elem = tabelem[MATROSKA_CODEC_ID] ai.codec = elem.get_data() except: ai.codec = "Unknown" try: ainfo = tabelem[MATROSKA_AUDIO_SETTINGS_ID] audtab = self.process_one_level(vinfo) ai.samplerate = unpack( '!f', audtab[MATROSKA_AUDIO_SAMPLERATE_ID].get_value())[0] ai.channels = audtab[MATROSKA_AUDIO_CHANNELS_ID].get_value() except: _print("No other info about audio track !!!") self.audio.append(ai) elif (mytype == MATROSKA_SUBTITLES_TRACK): try: elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID] language = elem.get_data() _print("Subtitle language found : %s" % elem.get_data()) except: language = "en" # By default self.subtitles.append(language)