def extractAVI(self, headers, **kwargs): audio_index = 1 for stream in headers.array("stream"): if "stream_hdr/stream_type" not in stream: continue stream_type = stream["stream_hdr/stream_type"].value if stream_type == "vids": if "stream_hdr" in stream: meta = Metadata(self) self.extractAVIVideo(stream["stream_hdr"], meta) self.addGroup("video", meta, "Video stream") elif stream_type == "auds": if "stream_fmt" in stream: meta = Metadata(self) self.extractAVIAudio(stream["stream_fmt"], meta) self.addGroup("audio[%u]" % audio_index, meta, "Audio stream") audio_index += 1 if "avi_hdr" in headers: self.useAviHeader(headers["avi_hdr"]) # Compute global bit rate if self.has("duration") and "/movie/size" in headers: self.bit_rate = float( headers["/movie/size"].value) * 8 / timedelta2seconds( self.get('duration')) # Video has index? scan_index = (True, kwargs['scan_index'])['scan_index' in kwargs] if scan_index and "/index" in headers: self.comment = "Has audio/video index (%s)" \ % humanFilesize(headers["/index"].size // 8)
def processSubtitle(self, track): sub = Metadata(self) self.trackCommon(track, sub) try: sub.compression = track["CodecID/string"].value except MissingField: pass self.addGroup("subtitle[]", sub, "Subtitle")
def processVideo(self, track): video = Metadata(self) self.trackCommon(track, video) try: video.compression = track["CodecID/string"].value if "Video" in track: video.width = track["Video/PixelWidth/unsigned"].value video.height = track["Video/PixelHeight/unsigned"].value except MissingField: pass self.addGroup("video[]", video, "Video stream")
def processFile(self, field): meta = Metadata(self) meta.filename = field["filename"].value meta.creation_date = field["last_mod"].value meta.compression = field["compression"].display if "data_desc" in field: meta.file_size = field["data_desc/file_uncompressed_size"].value if field["data_desc/file_compressed_size"].value: meta.compr_size = field["data_desc/file_compressed_size"].value else: meta.file_size = field["uncompressed_size"].value if field["compressed_size"].value: meta.compr_size = field["compressed_size"].value computeCompressionRate(meta) self.addGroup(field.name, meta, "File \"%s\"" % meta.get('filename'))
def extract(self, tar): max_nb = maxNbFile(self) for index, field in enumerate(tar.array("file")): if max_nb is not None and max_nb <= index: self.warning("TAR archive contains many files, " "but only first %s files are processed" % max_nb) break meta = Metadata(self) self.extractFile(field, meta) if meta.has("filename"): title = 'File "%s"' % meta.getText('filename') else: title = "File" self.addGroup(field.name, meta, title)
def extract(self, flv): if "video[0]" in flv: meta = Metadata(self) self.extractVideo(flv["video[0]"], meta) self.addGroup("video", meta, "Video stream") if "audio[0]" in flv: meta = Metadata(self) self.extractAudio(flv["audio[0]"], meta) self.addGroup("audio", meta, "Audio stream") # TODO: Computer duration # One technic: use last video/audio chunk and use timestamp # But this is very slow self.format_version = flv.description if "metadata/entry[1]" in flv: self.extractAMF(flv["metadata/entry[1]"]) if self.has('duration'): self.bit_rate = flv.size / timedelta2seconds(self.get('duration'))
def useFile(self, field): meta = Metadata(self) meta.filename = field["filename"].value meta.file_size = field["filesize"].value meta.creation_date = field["timestamp"].value attr = field["attributes"].value if attr != "(none)": meta.file_attr = attr if meta.has("filename"): title = "File \"%s\"" % meta.getText('filename') else: title = "File" self.addGroup(field.name, meta, title)
def extract(self, ogg): granule_quotient = None for index, page in enumerate(ogg.array("page")): if "segments" not in page: continue page = page["segments"] if "vorbis_hdr" in page: meta = Metadata(self) self.vorbisHeader(page["vorbis_hdr"], meta) self.addGroup("audio[]", meta, "Audio") if not granule_quotient and meta.has("sample_rate"): granule_quotient = meta.get('sample_rate') if "theora_hdr" in page: meta = Metadata(self) self.theoraHeader(page["theora_hdr"], meta) self.addGroup("video[]", meta, "Video") if "video_hdr" in page: meta = Metadata(self) self.videoHeader(page["video_hdr"], meta) self.addGroup("video[]", meta, "Video") if not granule_quotient and meta.has("frame_rate"): granule_quotient = meta.get('frame_rate') if "comment" in page: readVorbisComment(self, page["comment"]) if 3 <= index: # Only process pages 0..3 break # Compute duration if granule_quotient and QUALITY_NORMAL <= self.quality: page = ogg.createLastPage() if page and "abs_granule_pos" in page: try: self.duration = timedelta( seconds=float(page["abs_granule_pos"].value) / granule_quotient) except OverflowError: pass
def useStreamProp(self, stream, index): meta = Metadata(self) meta.comment = "Start: %s" % stream["stream_start"].value if getValue(stream, "mime_type") == "logical-fileinfo": for prop in stream.array("file_info/prop"): self.useFileInfoProp(prop) else: meta.bit_rate = stream["avg_bit_rate"].value meta.duration = timedelta(milliseconds=stream["duration"].value) meta.mime_type = getValue(stream, "mime_type") meta.title = getValue(stream, "desc") self.addGroup("stream[%u]" % index, meta, "Stream #%u" % (1 + index))
def processAudio(self, track): audio = Metadata(self) self.trackCommon(track, audio) if "Audio" in track: frequency = self.getDouble(track, "Audio/SamplingFrequency") if frequency is not None: audio.sample_rate = frequency if "Audio/Channels/unsigned" in track: audio.nb_channel = track["Audio/Channels/unsigned"].value if "Audio/BitDepth/unsigned" in track: audio.bits_per_sample = track["Audio/BitDepth/unsigned"].value if "CodecID/string" in track: audio.compression = track["CodecID/string"].value self.addGroup("audio[]", audio, "Audio stream")
def extract(self, mar): self.comment = "Contains %s files" % mar["nb_file"].value self.format_version = ("Microsoft Archive version %s" % mar["version"].value) max_nb = maxNbFile(self) for index, field in enumerate(mar.array("file")): if max_nb is not None and max_nb <= index: self.warning("MAR archive contains many files, " "but only first %s files are processed" % max_nb) break meta = Metadata(self) meta.filename = field["filename"].value meta.compression = "None" meta.file_size = field["filesize"].value self.addGroup(field.name, meta, "File \"%s\"" % meta.getText('filename'))
def processHeader(self, header): compression = [] is_vbr = None if "ext_desc/content" in header: # Extract all data from ext_desc data = {} for desc in header.array("ext_desc/content/descriptor"): self.useExtDescItem(desc, data) # Have ToolName and ToolVersion? If yes, group them to producer key if "ToolName" in data and "ToolVersion" in data: self.producer = "%s (version %s)" % (data["ToolName"], data["ToolVersion"]) del data["ToolName"] del data["ToolVersion"] # "IsVBR" key if "IsVBR" in data: is_vbr = (data["IsVBR"] == 1) del data["IsVBR"] # Store data for key, value in data.items(): if key in self.EXT_DESC_TO_ATTR: key = self.EXT_DESC_TO_ATTR[key] else: if isinstance(key, str): key = makePrintable(key, "ISO-8859-1") value = "%s=%s" % (key, value) key = "comment" setattr(self, key, value) if "file_prop/content" in header: self.useFileProp(header["file_prop/content"], is_vbr) if "codec_list/content" in header: for codec in header.array("codec_list/content/codec"): if "name" in codec: text = codec["name"].value if "desc" in codec and codec["desc"].value: text = "%s (%s)" % (text, codec["desc"].value) compression.append(text) audio_index = 1 video_index = 1 for index, stream_prop in enumerate(header.array("stream_prop")): if "content/audio_header" in stream_prop: meta = Metadata(self) self.streamProperty(header, index, meta) self.streamAudioHeader(stream_prop["content/audio_header"], meta) if self.addGroup("audio[%u]" % audio_index, meta, "Audio stream #%u" % audio_index): audio_index += 1 elif "content/video_header" in stream_prop: meta = Metadata(self) self.streamProperty(header, index, meta) self.streamVideoHeader(stream_prop["content/video_header"], meta) if self.addGroup("video[%u]" % video_index, meta, "Video stream #%u" % video_index): video_index += 1 if "metadata/content" in header: info = header["metadata/content"] try: self.title = info["title"].value self.author = info["author"].value self.copyright = info["copyright"].value except MissingField: pass
def extract(self, icon): for index, header in enumerate(icon.array("icon_header")): image = Metadata(self) # Read size and colors from header image.width = header["width"].value image.height = header["height"].value bpp = header["bpp"].value nb_colors = header["nb_color"].value if nb_colors != 0: image.nb_colors = nb_colors if bpp == 0 and nb_colors in self.color_to_bpp: bpp = self.color_to_bpp[nb_colors] elif bpp == 0: bpp = 8 image.bits_per_pixel = bpp image.setHeader( "Icon #%u (%sx%s)" % (1 + index, image.get("width", "?"), image.get("height", "?"))) # Read compression from data (if available) key = "icon_data[%u]/header/codec" % index if key in icon: image.compression = icon[key].display key = "icon_data[%u]/pixels" % index if key in icon: computeComprRate(image, icon[key].size) # Store new image self.addGroup("image[%u]" % index, image)